{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=sk-替换为自己的Qwen API_KEY"
   ],
   "id": "2bc8400f50043736"
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "af375836-b870-458b-87d1-4e00565977eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:45.993682Z",
     "iopub.status.busy": "2024-12-04T14:08:45.993538Z",
     "iopub.status.idle": "2024-12-04T14:08:46.009173Z",
     "shell.execute_reply": "2024-12-04T14:08:46.008074Z",
     "shell.execute_reply.started": "2024-12-04T14:08:45.993670Z"
    },
    "papermill": {
     "duration": 0.115454,
     "end_time": "2024-11-23T14:29:00.919641",
     "exception": false,
     "start_time": "2024-11-23T14:29:00.804187",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%capture --no-stderr\n",
    "!pip install -U langchain langchain_community langchain_openai pypdf sentence_transformers chromadb shutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1e2c72b8-ee12-4130-af88-699998aa230c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:46.010002Z",
     "iopub.status.busy": "2024-12-04T14:08:46.009704Z",
     "iopub.status.idle": "2024-12-04T14:08:46.216697Z",
     "shell.execute_reply": "2024-12-04T14:08:46.216230Z",
     "shell.execute_reply.started": "2024-12-04T14:08:46.009989Z"
    },
    "papermill": {
     "duration": 0.319981,
     "end_time": "2024-11-23T14:29:01.380771",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.060790",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "841d2b02-ad06-40d2-b11f-c7adccec6ca2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:46.217338Z",
     "iopub.status.busy": "2024-12-04T14:08:46.217166Z",
     "iopub.status.idle": "2024-12-04T14:08:46.278237Z",
     "shell.execute_reply": "2024-12-04T14:08:46.276041Z",
     "shell.execute_reply.started": "2024-12-04T14:08:46.217325Z"
    },
    "papermill": {
     "duration": 0.121409,
     "end_time": "2024-11-23T14:29:01.638126",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.516717",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "expr_version = 'split_01_4_markdown_header_text_split_v3'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)\n",
    "\n",
    "os.makedirs(expr_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf7e81e3-4c82-4842-aef5-7592caaf1d39",
   "metadata": {
    "papermill": {
     "duration": 0.100379,
     "end_time": "2024-11-23T14:29:01.862379",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.762000",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 读取文档"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e6920e29-bc7d-4635-be06-d151eaf0e100",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:46.285357Z",
     "iopub.status.busy": "2024-12-04T14:08:46.284618Z",
     "iopub.status.idle": "2024-12-04T14:08:48.068863Z",
     "shell.execute_reply": "2024-12-04T14:08:48.068389Z",
     "shell.execute_reply.started": "2024-12-04T14:08:46.285289Z"
    },
    "papermill": {
     "duration": 2.012298,
     "end_time": "2024-11-23T14:29:03.974974",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.962676",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "\n",
    "loader = PyPDFLoader(os.path.join(os.path.pardir, 'data', '2024全球经济金融展望报告.pdf'))\n",
    "\n",
    "pdf_documents = loader.load()\n",
    "markdown_documents = open(os.path.join(os.path.pardir, 'outputs', 'MinerU_parsed_20241204', '2024全球经济金融展望报告.md')).read()\n",
    "\n",
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "841ec659-4ad7-4e1f-b1ea-3477bf97fde3",
   "metadata": {
    "papermill": {
     "duration": 0.100297,
     "end_time": "2024-11-23T14:29:04.219302",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.119005",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 文档切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "74fe856a-7c19-4c3c-bb30-7abfa6298f74",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.069496Z",
     "iopub.status.busy": "2024-12-04T14:08:48.069331Z",
     "iopub.status.idle": "2024-12-04T14:08:48.077288Z",
     "shell.execute_reply": "2024-12-04T14:08:48.076848Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.069483Z"
    },
    "papermill": {
     "duration": 0.109229,
     "end_time": "2024-11-23T14:29:04.429069",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.319840",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
    "from uuid import uuid4\n",
    "\n",
    "def split_pdf_docs(documents, filepath, chunk_size=400, chunk_overlap=40, seperators=['\\n\\n\\n', '\\n\\n'], force_split=False):\n",
    "    if os.path.exists(filepath) and not force_split:\n",
    "        print('found cache, restoring...')\n",
    "        return pickle.load(open(filepath, 'rb'))\n",
    "\n",
    "    splitter = RecursiveCharacterTextSplitter(\n",
    "        chunk_size=chunk_size,\n",
    "        chunk_overlap=chunk_overlap,\n",
    "        separators=seperators\n",
    "    )\n",
    "    split_docs = splitter.split_documents(documents)\n",
    "    for chunk in split_docs:\n",
    "        chunk.metadata['uuid'] = str(uuid4())\n",
    "\n",
    "    pickle.dump(split_docs, open(filepath, 'wb'))\n",
    "\n",
    "    return split_docs\n",
    "\n",
    "def split_md_docs(markdown_document):\n",
    "    headers_to_split_on = [\n",
    "        (\"#\", \"Header 1\"),\n",
    "        (\"##\", \"Header 2\"),\n",
    "        (\"###\", \"Header 3\"),\n",
    "    ]\n",
    "    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)\n",
    "    md_header_splits = markdown_splitter.split_text(markdown_document)\n",
    "\n",
    "    return md_header_splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "aa25540d-0504-4ae7-9804-9e3862b132d5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.077868Z",
     "iopub.status.busy": "2024-12-04T14:08:48.077744Z",
     "iopub.status.idle": "2024-12-04T14:08:48.090660Z",
     "shell.execute_reply": "2024-12-04T14:08:48.090247Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.077856Z"
    },
    "papermill": {
     "duration": 0.145583,
     "end_time": "2024-11-23T14:29:04.677429",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.531846",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "found cache, restoring...\n"
     ]
    }
   ],
   "source": [
    "pdf_splitted_docs = split_pdf_docs(pdf_documents, os.path.join(preprocess_output_dir, 'split_docs.pkl'), chunk_size=500, chunk_overlap=50)\n",
    "md_splitted_docs = split_md_docs(markdown_documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "28d8135e-3fda-4c3b-9c69-059a2f014219",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.091326Z",
     "iopub.status.busy": "2024-12-04T14:08:48.091144Z",
     "iopub.status.idle": "2024-12-04T14:08:48.095417Z",
     "shell.execute_reply": "2024-12-04T14:08:48.095005Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.091313Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(pdf_splitted_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c015e2ab-c5f6-4621-ba2a-9c7f26d887ae",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.095954Z",
     "iopub.status.busy": "2024-12-04T14:08:48.095827Z",
     "iopub.status.idle": "2024-12-04T14:08:48.101842Z",
     "shell.execute_reply": "2024-12-04T14:08:48.101430Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.095941Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content='研究院\\n全球经济金融展望报告\\n要点2024年年报（总第57期） 报告日期：2023年12月12日\\n●2023年全球经济增长动力持续回落，各国复苏分化，\\n发达经济体增速明显放缓，新兴经济体整体表现稳定。\\n全球贸易增长乏力，各国生产景气度逐渐回落，内需\\n对经济的拉动作用减弱。欧美央行货币政策紧缩态势\\n放缓，美元指数高位震荡后走弱，全球股市表现总体\\n好于预期，但区域分化明显。高利率环境抑制债券融\\n资需求，债券违约风险持续上升。\\n●展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策走势将进一步分化。欧美\\n央行大概率结束本轮紧缩货币周期，美元指数将逐步\\n走弱，流向新兴经济体的跨境资本将增加。国际原油\\n市场短缺格局或延续，新能源发展成为重点。\\n●海湾六国经济发展与投资前景、高利率和高债务对\\n美国房地产市场脆弱性的影响等热点问题值得关注。中国银行研究院\\n全球经济金融研究课题组\\n组长：陈卫东\\n副组长：钟红\\n廖淑萍\\n成员：边卫红\\n熊启跃\\n王有鑫\\n曹鸿宇\\n李颖婷\\n王宁远\\n初晓\\n章凯莉\\n黄小军（纽约）\\n陆晓明（纽约）\\n黄承煜（纽约）\\n宋达志（伦敦）\\n李振龙（伦敦）\\n张传捷（伦敦）\\n刘冰彦（法兰克福）\\n温颍坤（法兰克福）\\n张明捷（法兰克福）\\n王哲（东京）\\n李彧（香港）\\n黎永康（香港）\\n联系人：王有鑫\\n电话：010-66594127\\n邮件：wangyouxin_hq@bank-of-china.com主要经济体GDP增速变化趋势（%）\\n资料来源：IMF，中国银行研究院', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 0, 'uuid': 'e73a0c9d-d42b-4350-a4c3-b38bf67c68a5'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 1 2024年\\n全球经济复苏疲软，货币政策取向分化\\n——中国银行全球经济金融展望报告（2024年）\\n2023年，全球经济增长动力持续回落。分区域看，各国复苏存在较大差异，\\n发达经济体增速明显放缓，新兴经济体增速与2022年大致持平。生产端，全球\\n供应链持续恢复，但生产景气度逐渐回落。需求端，内需对经济的拉动作用逐\\n渐减弱，各国国内投资和跨境投资均持续承压；全球货物贸易量指数和价格指\\n数下行，主要经济体出口贸易同比增速下降。欧美央行货币政策延续收紧态势，\\n但步伐整体放缓；金融体系短期资金运行发生结构性变化，“去存款化”特征\\n突出。美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化显\\n著。高利率环境抑制债券融资需求，债券违约风险持续上升，美国政府债务可\\n持续性问题引发市场关注。展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策将进一步分化。欧美央行大概率结束本轮加息周期，\\n日本央行可能退出负利率政策，跨境资本回流美国趋势将放缓，流向新兴经济\\n体的资金将增加。美元指数将逐步走弱，新兴经济体货币汇率有望回升。国际\\n原油市场短缺格局或延续，新能源发展成为重点。本期报告分别对海湾六国经\\n济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响两个专题\\n展开分析。\\n一、全球经济回顾与展望\\n（一）全球经济将在波动分化中筑底复苏\\n2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政\\n治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023\\n年全球GDP增速为2.7%（市场汇率法），较2022年下降0.3个百分点。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 2, 'uuid': '41d95288-441d-4c02-948a-6a3f0f4ef3ba'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 2 2024年\\n图1：全球GDP增速（%）\\n资料来源：IMF，中国银行研究院\\n分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明\\n显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经\\n济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英\\n国GDP环比增速均由之前的正增长转为负增长，分别下降0.1%和0.03%；美\\n国GDP环比增长折年率为4.9%，比二季度增速高2.8个百分点。新兴经济体增\\n速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，\\n东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中\\n东欧国家经济增速加快（图2）。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 3, 'uuid': '1f406690-b478-43cd-96f8-cd77924e300e'})]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "414feddc-648f-444b-9988-224e6e6b2fb1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.102424Z",
     "iopub.status.busy": "2024-12-04T14:08:48.102300Z",
     "iopub.status.idle": "2024-12-04T14:08:48.109574Z",
     "shell.execute_reply": "2024-12-04T14:08:48.109206Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.102411Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "47"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(md_splitted_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "543f6f4e-28c1-4238-ae99-9abab95c2318",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.110124Z",
     "iopub.status.busy": "2024-12-04T14:08:48.110000Z",
     "iopub.status.idle": "2024-12-04T14:08:48.119787Z",
     "shell.execute_reply": "2024-12-04T14:08:48.119399Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.110112Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'Header 1': '全球经济金融展望报告'}, page_content='# 全球经济金融展望报告  \\n2024年年报（总第57期）  \\n报告日期：2023年12月12日'),\n",
       " Document(metadata={'Header 1': '要点'}, page_content='# 要点  \\n●2023 年全球经济增长动力持续回落，各国复苏分化，发达经济体增速明显放缓，新兴经济体整体表现稳定。全球贸易增长乏力，各国生产景气度逐渐回落，内需对经济的拉动作用减弱。欧美央行货币政策紧缩态势放缓，美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化明显。高利率环境抑制债券融资需求，债券违约风险持续上升。  \\n$\\\\bullet$ 展望2024 年，预计全球经济复苏将依旧疲软，主要经济体增长态势和货币政策走势将进一步分化。欧美央行大概率结束本轮紧缩货币周期，美元指数将逐步走弱，流向新兴经济体的跨境资本将增加。国际原油市场短缺格局或延续，新能源发展成为重点。  \\n$\\\\bullet$ 海湾六国经济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响等热点问题值得关注。  \\n![](images/c7e6ce1606712e84e07a05bcf6016906efa3fc778e40fcd0e91ac4fcb5503b79.jpg)\\n主要经济体GDP 增速变化趋势（%）\\n资料来源：IMF，中国银行研究院'),\n",
       " Document(metadata={'Header 1': '中国银行研究院全球经济金融研究课题组'}, page_content='# 中国银行研究院全球经济金融研究课题组  \\n![](images/a5d0eb181c75231451c8f890ec50fe5822e2306a9beb543ca35a04880abbf639.jpg)  \\n联系人：王有鑫\\n电话：010-66594127\\n邮件： wangyouxin_hq@bank-of-china.com')]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "md_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4fb9cf39-1221-4b46-ab92-b300dc261c8e",
   "metadata": {},
   "source": [
    "## 检查一下切分后的块长度分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c76b31aa-28af-430b-a62c-8879905176b7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.120518Z",
     "iopub.status.busy": "2024-12-04T14:08:48.120234Z",
     "iopub.status.idle": "2024-12-04T14:08:48.127374Z",
     "shell.execute_reply": "2024-12-04T14:08:48.126941Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.120505Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      52.000000\n",
       "mean      623.307692\n",
       "std       258.763920\n",
       "min        65.000000\n",
       "25%       476.750000\n",
       "50%       618.000000\n",
       "75%       801.250000\n",
       "max      1306.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in pdf_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "91e17fe4-4ef8-4768-932e-ed9cfb76eef6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.127926Z",
     "iopub.status.busy": "2024-12-04T14:08:48.127805Z",
     "iopub.status.idle": "2024-12-04T14:08:48.134871Z",
     "shell.execute_reply": "2024-12-04T14:08:48.134467Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.127914Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      47.000000\n",
       "mean      711.787234\n",
       "std       677.696191\n",
       "min         7.000000\n",
       "25%       244.000000\n",
       "50%       433.000000\n",
       "75%      1040.500000\n",
       "max      2862.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in md_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7872f43-d308-4eed-9dc0-9ef73cd96ba9",
   "metadata": {},
   "source": [
    "## 检查超长块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a0370f48-6a02-4aac-a841-5a911182a4af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.137004Z",
     "iopub.status.busy": "2024-12-04T14:08:48.136869Z",
     "iopub.status.idle": "2024-12-04T14:08:48.141164Z",
     "shell.execute_reply": "2024-12-04T14:08:48.140793Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.136991Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "page_content='# （一）全球经济将在波动分化中筑底复苏  \n",
      "2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023年全球GDP增速为 $2.7\\%$ （市场汇率法），较2022年下降0.3个百分点。  \n",
      "![](images/7600acb45b91442f8127f20629c791d91f04827835929cb12612c409fde82574.jpg)\n",
      "图1：全球GDP增速 $(\\%)$ ）  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英国GDP环比增速均由之前的正增长转为负增长，分别下降 $0.1\\%$ 和 $0.03\\%$ ；美国GDP环比增长折年率为 $4.9\\%$ ，比二季度增速高2.8个百分点。新兴经济体增速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中东欧国家经济增速加快（图2）。  \n",
      "![](images/abf30ccab508a0c4733d58e3810cda53dabdaeb4239acf37e57a931a0296d80c.jpg)\n",
      "图2：主要经济体GDP增速变化趋势（%）\n",
      "注：东盟五国包含印度尼西亚、马来西亚、菲律宾、新加坡和泰国。  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "从生产端看，全球供应链持续恢复，但生产景气度逐渐回落。截至2023年10月底，纽约联储全球供应链压力指数降至有记录以来的最低值。荷兰经济分析局数据显示，全球工业生产量于4月触及年内低位，5-8月逐月回升，但发达经济体和新兴经济体分化明显（图3）。其中，主要新兴经济体工业生产指数普遍走高，如俄罗斯、土耳其、南非等，而发达经济体中的美国和韩国回升，英国、德国、意大利下行，日本波动较大，整体趋于平稳。全球融资环境收紧和经济下行压力对工业生产前景带来较大影响，全球制造业PMI指数明显回落，从2月的 $49.9\\%$ 降至10月的 $48.8\\%$ 。  \n",
      "![](images/7d2b17776c10d8fc38a113a20b40791a9e65da33b4209516d0bde88163bee3ea.jpg)\n",
      "图3：部分经济体工业生产指数变化趋势（2010年 $\\mathbf{-100}\\rangle$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院  \n",
      "从需求端看，内需是支撑发达经济体增长的主要动力，但对经济的拉动作用逐渐减弱。美国消费未受加息明显影响，私人消费维持稳定增长，前三季度对美国经济增长的贡献率高达 $64.4\\%$ ；8-9月，美国零售和食品销售额连续两个月环比增速保持在 $0.7\\%$ 以上，高于市场预期，但10月增速大幅回落至 $-0.1\\%$ 。欧洲各国消费指数整体维持稳定（图4），是上半年免于陷入衰退的主要动力。但随着高利率和高通胀持续，对消费的影响逐渐释放，内需增长动力逐渐弱化，全球服务业PMI指数从二季度开始明显回落，从5月的 $55.5\\%$ 降至10月的 $50.4\\%$ 连续5个月下行；OECD消费者信心指数从7月开始连续3个月回落。  \n",
      "![](images/3876098e7c8b21ca208f46cd2b25aa420574a706ae2648c774fcf130fac892db.jpg)\n",
      "图4：部分欧洲国家零售销售指数\n",
      "注：除英国是以2019年为基年外，其他经济体均为2015年为基年。资料来源：Wind，中国银行研究院  \n",
      "发达经济体投资受加息政策影响较大，国内投资和跨境投资均持续承压。美国私人投资在2023年一季度触底后逐渐反弹，三季度存货及住宅投资恢复增长，带动私人投资增速提升至 $8.4\\%$ （经季调后环比折年率），但制造业和设备投资均放缓，环比增长折年率分别降低 $0.1\\%$ 和 $3.8\\%$ 。欧盟投资增速放缓，房地产投资减少。2023年二季度，欧元区固定资本形成总额环比增长 $0.1\\%$ ，比一季度增速下降0.3个百分点，房地产对GDP环比增长拉动率转为负值。在紧缩货币政策影响下，发达经济体企业部门宏观杠杆率下降，企业加杠杆或负债投资意愿不足。同2022年底相比，2023年二季度，美国、英国、法国、意大利和德国非金融企业部门负债率分别下降了2.4个、3.4个、4.0个、3.0个和1.3个百分点（图5）。IMF预测2023年全球投资率（投资占GDP的比重）将下降1.0个百分点至 $26.4\\%$ （图6），其中，欧盟将下降1.1个百分点，比发达经济体平均降幅高0.2个百分点。从跨境投资角度看，受地缘政治局势紧张、金融领域动荡加剧、高利率和投资审查趋严等影响，并购交易仍然疲软，而在全球产业链重塑背景下，东南亚等区域绿地投资恢复增长。联合国贸发会议预计2023年全球跨境直接投资将继续下行，但降幅较2022年收窄。  \n",
      "![](images/876898312b7f8b55b06bc9b09f7a585aebeb663d67fe281dfb4ed939588a8d6e.jpg)\n",
      "图5：部分发达国家非金融企业部门债务率（%）\n",
      "资料来源：IIF，中国银行研究院\n",
      "图6：全球投资率变化趋势 $(\\,\\%)$ ）  \n",
      "![](images/5c049cdfb254bf5b7720e25c09f7e2e434c5c77b74897dfb28ee5154f4ef318e.jpg)\n",
      "资料来源：IMF，中国银行研究院  \n",
      "从国际贸易角度看，全球货物贸易量和价格指数均承压下行，主要经济体出口贸易同比增速下降。荷兰经济分析局数据显示，2023年1-8月，全球货物贸易量指数和价格指数均震荡下行，8月数值比1月分别下降0.9个和4.3个点（图7）。10月，世贸组织将2023年全球货物贸易增速预测值下调0.9个百分点至 $0.8\\%$ ，2023年国际贸易增长或为近几年最低水平。但近期东亚、东南亚等主要经济体出口下行趋势收窄，贸易呈现企稳迹象。9-10月，越南出口结束连续10个月的负增长态势，同比分别增长 $5.0\\%$ 和 $6.7\\%$ 。10月，韩国出口同比增长 $5.1\\%$ ，是自2022年10月以来首次正增长。  \n",
      "![](images/5826ae44f43ef12c95089d898a8b9375a7e989c7ba7a6de6388cbdd174b65516.jpg)\n",
      "图7：全球货物贸易量指数和货物贸易价格指数（2010年 $\\mathbf{-100}.$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院' metadata={'Header 1': '（一）全球经济将在波动分化中筑底复苏'}\n"
     ]
    }
   ],
   "source": [
    "for d in md_splitted_docs:\n",
    "    if len(d.page_content) > 2000:\n",
    "        print(d)\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ec5e814-0a7e-4910-b3d1-e56343aded72",
   "metadata": {},
   "source": [
    "考虑到有不少切片依然非常大，此处对较大的片段做二次切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "44de0a4c-df8f-4cd2-84ab-32d7c56df772",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.141752Z",
     "iopub.status.busy": "2024-12-04T14:08:48.141615Z",
     "iopub.status.idle": "2024-12-04T14:08:48.154171Z",
     "shell.execute_reply": "2024-12-04T14:08:48.153611Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.141738Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain.text_splitter import MarkdownTextSplitter\n",
    "\n",
    "new_md_splitted_docs = []\n",
    "splitter = MarkdownTextSplitter(\n",
    "    chunk_size=500,\n",
    "    chunk_overlap=50\n",
    ")\n",
    "for doc in md_splitted_docs:\n",
    "    if len(doc.page_content) > 700:\n",
    "        small_chunks = splitter.split_documents([doc])\n",
    "        # 把原始文档的标题回小片段的正文\n",
    "        for doc in small_chunks[1:]:\n",
    "            header_prefix = ''\n",
    "            for head_level in range(1, 4):\n",
    "                if f'Header {head_level}' in doc.metadata:\n",
    "                    header_prefix += '#' * head_level + ' ' + doc.metadata[f'Header {head_level}'] + '\\n'\n",
    "            doc.page_content = header_prefix + doc.page_content\n",
    "        \n",
    "        new_md_splitted_docs.extend(small_chunks)\n",
    "    else:\n",
    "        new_md_splitted_docs.append(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d1ef80c2-b46c-4a4c-abcb-8e1c9dfc836e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.155093Z",
     "iopub.status.busy": "2024-12-04T14:08:48.154746Z",
     "iopub.status.idle": "2024-12-04T14:08:48.162538Z",
     "shell.execute_reply": "2024-12-04T14:08:48.161985Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.155075Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "102"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(new_md_splitted_docs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "220dbc3a-fceb-4e49-a3f1-01e16660b2a6",
   "metadata": {
    "papermill": {
     "duration": 0.100209,
     "end_time": "2024-11-23T14:29:05.255871",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.155662",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "8598a11c-25d8-4af1-a98b-06a8c394e261",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:48.163245Z",
     "iopub.status.busy": "2024-12-04T14:08:48.163084Z",
     "iopub.status.idle": "2024-12-04T14:08:49.005973Z",
     "shell.execute_reply": "2024-12-04T14:08:49.005603Z",
     "shell.execute_reply.started": "2024-12-04T14:08:48.163229Z"
    },
    "papermill": {
     "duration": 0.989203,
     "end_time": "2024-11-23T14:29:06.345534",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.356331",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "device: cuda\n"
     ]
    }
   ],
   "source": [
    "from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "import torch\n",
    "\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "print(f'device: {device}')\n",
    "\n",
    "def get_embeddings(model_path):\n",
    "    embeddings = HuggingFaceBgeEmbeddings(\n",
    "        model_name=model_path,\n",
    "        model_kwargs={'device': device},\n",
    "        encode_kwargs={'normalize_embeddings': True},\n",
    "        # show_progress=True\n",
    "        query_instruction='为这个句子生成表示以用于检索相关文章：'\n",
    "    )\n",
    "    return embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "f6f46c73-7369-448f-a89a-ed3d817cad47",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:49.006812Z",
     "iopub.status.busy": "2024-12-04T14:08:49.006470Z",
     "iopub.status.idle": "2024-12-04T14:08:51.899869Z",
     "shell.execute_reply": "2024-12-04T14:08:51.899356Z",
     "shell.execute_reply.started": "2024-12-04T14:08:49.006798Z"
    },
    "papermill": {
     "duration": 83.983138,
     "end_time": "2024-11-23T14:35:06.117207",
     "exception": false,
     "start_time": "2024-11-23T14:33:42.134069",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "model_path = 'BAAI/bge-large-zh-v1.5'\n",
    "embeddings = get_embeddings(model_path)\n",
    "\n",
    "def get_vector_db(splitted_docs, embeddings, name):\n",
    "    persist_directory = os.path.join(expr_dir, 'chroma', 'bge', name)\n",
    "    shutil.rmtree(persist_directory, ignore_errors=True)\n",
    "    vector_db = Chroma.from_documents(\n",
    "        splitted_docs,\n",
    "        embedding=embeddings,\n",
    "        persist_directory=persist_directory\n",
    "    )\n",
    "    return vector_db"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3318f9bb-a7f8-4c44-bf8d-302b71dca44c",
   "metadata": {},
   "source": [
    "使用新的切分方式，每个切片的UUID跟原始切片不一致了，检索的Ground Truth丢失了，此处通过向量检索的方式，将原始的UUID复制到Markdown的切片上，方便后续排查检索问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "b48362cc-5776-4f1c-8feb-64b1a4a675e8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:08:51.900726Z",
     "iopub.status.busy": "2024-12-04T14:08:51.900450Z",
     "iopub.status.idle": "2024-12-04T14:09:08.816741Z",
     "shell.execute_reply": "2024-12-04T14:09:08.814348Z",
     "shell.execute_reply.started": "2024-12-04T14:08:51.900714Z"
    }
   },
   "outputs": [],
   "source": [
    "pdf_vector_db = get_vector_db(pdf_splitted_docs, embeddings, 'pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "dabf2d44-5afa-41f4-bd6c-1cbaaf00e571",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:08.820703Z",
     "iopub.status.busy": "2024-12-04T14:09:08.819752Z",
     "iopub.status.idle": "2024-12-04T14:09:14.124996Z",
     "shell.execute_reply": "2024-12-04T14:09:14.124526Z",
     "shell.execute_reply.started": "2024-12-04T14:09:08.820630Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1242952/1778381455.py:5: UserWarning: Relevance scores must be between 0 and 1, got [(Document(metadata={'page': 46, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': '873ee876-8549-49b9-b182-6584113b2701'}, page_content='全球经济金融展望报告\\n中国银行研究院 45 2024年\\n第四，中海深化经贸合作将为人民币国际化搭建新的平台。“石油美元”\\n体系对于非美元国家能源贸易存在不同程度的制约。在该体系下，中东石油出\\n口国需要在一定程度上放弃独立的货币政策，承担较大的汇率风险。尤其是近\\n年来美元逐渐被“政治化”，各国对于贸易计价与结算货币多元化的需求不断\\n增加。当前，中国与海湾六国拥有紧密的经贸联系，并且随着19个阿拉伯国家\\n相继加入“一带一路”倡议，人民币使用场景不断拓宽，为人民币国际化创造\\n了有利条件。未来，中海有望以双边经贸为依托，将人民币跨境贸易支付作为\\n人民币在中东地区使用的主要切入点，扩展与海湾六国在人民币储备和投资等\\n领域的货币合作，共同开发绿色金融，充分释放中海双方在推动人民币国际化\\n上的合作潜力。\\n专题二：高利率、高债务与美国房地产市场脆弱性\\n近期，美联储考察了影响金融稳定的风险因素，商业和居住房地产风险的\\n排名从2023年5月的第四位上升至10月的第二位，仅次于持续通胀与货币政\\n策紧缩风险。美国房地产市场的演进路径及风险传染引发市场高度关注。\\n（一）居住房地产市场\\n2023年二季度，美国居住房地产市值余额为56.3万亿美元，不仅远大于商\\n业房地产的24万亿美元，且在各类资产中排名第一。房地产在居民净资产中占\\n比达30%，房贷在居民债务中占比达66%，该市场的走势对美国金融市场、经\\n济走势影响巨大。鉴于该市场对利率及债务高度敏感，美联储加息及居民债务\\n负担上升将增加金融脆弱性。2022年3月以来，美联储激进加息，30年期住房\\n抵押贷款固定利率月均值从4%的历史低位逐渐上升至2023年10月的7.6%，远\\n高于次贷危机发生之前的6.5%。与次贷危机相比，本轮货币紧缩周期中居住房\\n地产市场整体相对稳定，但潜在风险可能上升。\\n第一，在疫情发生后，美国居住房地产市场需求持续上升。美联储对银行'), -0.05529412693199687)]\n",
      "  chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n",
      "/tmp/ipykernel_1242952/1778381455.py:5: UserWarning: Relevance scores must be between 0 and 1, got [(Document(metadata={'page': 33, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': '3e312dfa-dd43-4ab9-961f-a2e442c89cdd'}, page_content='全球经济金融展望报告\\n中国银行研究院 32 2024年\\n图19：美国联邦基金目标利率与全球MSCI指数\\n资料来源：Wind，中国银行研究院\\n表3：全球主要股指概览\\n注：涨跌幅区间为2023年1月1日至2023年11月15日，收盘价和市盈\\n率为2023年11月15日。\\n资料来源：Wind，中国银行研究院'), -0.04982325594463921)]\n",
      "  chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n"
     ]
    }
   ],
   "source": [
    "# 以新切片作为query，查询旧切片中最相似的那个，将它的UUID复制到新切片中\n",
    "for doc in new_md_splitted_docs:\n",
    "    query = doc.page_content\n",
    "    # 只检索最相似的那个\n",
    "    chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n",
    "    doc.metadata['uuid'] = chunk_score_pair[0].metadata['uuid']\n",
    "    doc.metadata['pdf_chunk_sim'] = chunk_score_pair[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a655299-15f5-44b1-925d-5137a1e1c881",
   "metadata": {},
   "source": [
    "chunk_score_pair的结构如下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "46817374-ceb2-486a-a7f2-240c2abc98f0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:14.125580Z",
     "iopub.status.busy": "2024-12-04T14:09:14.125453Z",
     "iopub.status.idle": "2024-12-04T14:09:14.128455Z",
     "shell.execute_reply": "2024-12-04T14:09:14.128069Z",
     "shell.execute_reply.started": "2024-12-04T14:09:14.125567Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Document(metadata={'page': 51, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': 'ebf0d999-59f6-4fd3-941e-05a7a60c255a'}, page_content='免责声明\\n本研究报告由中国银行研究院撰写，研究报告中所引用信息均来自公开资料。\\n本研究报告中包含的观点或估计仅代表作者迄今为止的判断，它们不一定反映中国银行的观点。中国\\n银行研究院可以不经通知加以改变，且没有对此报告更新、修正或修改的责任。\\n本研究报告内容及观点仅供参考，不构成任何投资建议。对于本报告所提供信息所导致的任何直接的\\n或者间接的投资盈亏后果不承担任何责任。\\n本研究报告版权仅为中国银行研究院所有，未经书面许可，任何机构和个人不得以任何形式翻版、复\\n制和发布。如引用发布，需注明出处为中国银行研究院，且不得对本报告进行有悖原意的引用、删节和修\\n改。中国银行研究院保留对任何侵权行为和有悖报告原意的引用行为进行追究的权利。'),\n",
       " 0.8652316217743071)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chunk_score_pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "62560d74-7d90-4e69-ae43-162b248e1622",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:14.129048Z",
     "iopub.status.busy": "2024-12-04T14:09:14.128888Z",
     "iopub.status.idle": "2024-12-04T14:09:32.467033Z",
     "shell.execute_reply": "2024-12-04T14:09:32.464693Z",
     "shell.execute_reply.started": "2024-12-04T14:09:14.129037Z"
    }
   },
   "outputs": [],
   "source": [
    "md_vector_db = get_vector_db(new_md_splitted_docs, embeddings, 'md')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55d51ebc-b29d-45be-b8c7-1d5610b270b8",
   "metadata": {},
   "source": [
    "# 计算检索准确率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ad8ef473-7ad8-43d4-8b9a-9890cf3bf4c6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:32.470567Z",
     "iopub.status.busy": "2024-12-04T14:09:32.469829Z",
     "iopub.status.idle": "2024-12-04T14:09:32.482778Z",
     "shell.execute_reply": "2024-12-04T14:09:32.480623Z",
     "shell.execute_reply.started": "2024-12-04T14:09:32.470500Z"
    }
   },
   "outputs": [],
   "source": [
    "test_df = qa_df[(qa_df['dataset'] == 'test') & (qa_df['qa_type'] == 'detailed')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "070b78ef-3140-4e59-886c-09c5184a8ee9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:32.485837Z",
     "iopub.status.busy": "2024-12-04T14:09:32.485114Z",
     "iopub.status.idle": "2024-12-04T14:09:32.500896Z",
     "shell.execute_reply": "2024-12-04T14:09:32.498553Z",
     "shell.execute_reply.started": "2024-12-04T14:09:32.485768Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "93"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(test_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "435148a0-b2b1-49fb-8eea-2ad117c0b9d4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:32.504966Z",
     "iopub.status.busy": "2024-12-04T14:09:32.504221Z",
     "iopub.status.idle": "2024-12-04T14:09:32.517808Z",
     "shell.execute_reply": "2024-12-04T14:09:32.516408Z",
     "shell.execute_reply.started": "2024-12-04T14:09:32.504900Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_hit_stat_df(vector_db, top_k_arr=list(range(1, 9))):\n",
    "    hit_stat_data = []\n",
    "\n",
    "    for k in tqdm(top_k_arr):\n",
    "        for idx, row in test_df.iterrows():\n",
    "            question = row['question']\n",
    "            true_uuid = row['uuid']\n",
    "            # chunks = retrieve_fn(question, k=k)\n",
    "            chunks = vector_db.similarity_search(question, k=k)\n",
    "            retrieved_uuids = [doc.metadata['uuid'] for doc in chunks]\n",
    "\n",
    "            hit_stat_data.append({\n",
    "                'question': question,\n",
    "                'top_k': k,\n",
    "                'hit': int(true_uuid in retrieved_uuids),\n",
    "                'retrieved_chunks': len(chunks)\n",
    "            })\n",
    "    hit_stat_df = pd.DataFrame(hit_stat_data)\n",
    "    return hit_stat_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "01e01af2-9f53-462a-bcb1-2864864e6488",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:32.519713Z",
     "iopub.status.busy": "2024-12-04T14:09:32.519341Z",
     "iopub.status.idle": "2024-12-04T14:09:52.519790Z",
     "shell.execute_reply": "2024-12-04T14:09:52.519445Z",
     "shell.execute_reply.started": "2024-12-04T14:09:32.519677Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2b59081cd10641669bdd48c4f946cc25",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "hit_stat_df = get_hit_stat_df(md_vector_db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "de0c3de0-92b5-4804-a374-108984640cf8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:52.520366Z",
     "iopub.status.busy": "2024-12-04T14:09:52.520236Z",
     "iopub.status.idle": "2024-12-04T14:09:52.526605Z",
     "shell.execute_reply": "2024-12-04T14:09:52.526209Z",
     "shell.execute_reply.started": "2024-12-04T14:09:52.520354Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>top_k</th>\n",
       "      <th>hit_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.397849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.494624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.537634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.612903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.645161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>0.677419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>0.720430</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>0.731183</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   top_k  hit_rate\n",
       "0      1  0.397849\n",
       "1      2  0.494624\n",
       "2      3  0.537634\n",
       "3      4  0.612903\n",
       "4      5  0.645161\n",
       "5      6  0.677419\n",
       "6      7  0.720430\n",
       "7      8  0.731183"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hit_stat_df.groupby(['top_k'])['hit'].mean().reset_index().rename(columns={'hit': 'hit_rate'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "ccc0dca9-8ad6-4d0c-a6e1-8279babbdfbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:52.527276Z",
     "iopub.status.busy": "2024-12-04T14:09:52.527086Z",
     "iopub.status.idle": "2024-12-04T14:09:52.898154Z",
     "shell.execute_reply": "2024-12-04T14:09:52.897701Z",
     "shell.execute_reply.started": "2024-12-04T14:09:52.527264Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='top_k', ylabel='hit'>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmRklEQVR4nO3df1SUdd7/8dcwyiCpmCGgRLJqiVhCC8Gid+kWLbd52rjve7upY8FOLvf5bk5LO9/6KrkLWenYVoSnPJIm2V3ratttP/bO6McUtm60GOaubmW5m0LpgJ42MCqomfn+0WlaVjA0mGv48Hycc53jXPO5mPe1nban11zD2ILBYFAAAACGiLJ6AAAAgIFE3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwygirBwi3QCCgQ4cOacyYMbLZbFaPAwAA+iEYDOrYsWOaNGmSoqJOfG1m2MXNoUOHlJKSYvUYAADgFLS0tOjMM8884ZphFzdjxoyR9OX/OGPHjrV4GgAA0B8dHR1KSUkJ/Xf8RIZd3Hz1VtTYsWOJGwAAhpj+3FLCDcUAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIwywuoBAADAwLj11lutHmFAfNvz4MoNAAAwCnEDAACMQtwAAACjEDcAAMAo3FAMADDOWytesnqEATFj2cVWjzAkceUGAAAYhbgBAABGIW4AAIBRiBsAAGCUiIibNWvWKDU1VTExMcrNzVVjY2Ofa+fNmyebzXbctmDBgjBODAAAIpXlcbNlyxa53W5VVlZq165dysjIUEFBgdra2npdv3XrVh0+fDi07d27V3a7XVdeeWWYJwcAAJHI8ripqqpSaWmpnE6n0tPTVVNTo9jYWNXW1va6fvz48UpKSgptL7zwgmJjY4kbAAAgyeK46e7uVlNTk/Lz80P7oqKilJ+fr4aGhn79jA0bNuiqq67SaaedNlhjAgCAIcTSX+J39OhR+f1+JSYm9tifmJiot99++xuPb2xs1N69e7Vhw4Y+13R1damrqyv0uKOj49QHBgAAEc/yt6W+jQ0bNui8885TTk5On2s8Ho/i4uJCW0pKShgnBAAA4WZp3MTHx8tut6u1tbXH/tbWViUlJZ3w2M7OTm3evFmLFi064bry8nK1t7eHtpaWlm89NwAAiFyWxk10dLSysrLk9XpD+wKBgLxer/Ly8k547G9/+1t1dXXpmmuuOeE6h8OhsWPH9tgAAIC5LP/iTLfbrZKSEmVnZysnJ0fV1dXq7OyU0+mUJBUXFys5OVkej6fHcRs2bFBhYaHOOOMMK8YGAAARyvK4KSoq0pEjR1RRUSGfz6fMzEzV1dWFbjJubm5WVFTPC0z79u3Tjh079Pzzz1sxMgAAiGCWx40kuVwuuVyuXp+rr68/bt/06dMVDAYHeSoAADAUDelPSwEAAPwz4gYAABiFuAEAAEYhbgAAgFEi4oZiAMDgWHHNj6weYUAse/Rxq0fAEMKVGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARhlh9QAAEA73/9/fWT3CgHDdc7nVIwARjys3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMMsLqAQCE1/aL5lo9woCY+8p2q0cAEKEsv3KzZs0apaamKiYmRrm5uWpsbDzh+o8++kiLFy/WxIkT5XA4dM4552jbtm1hmhYAAEQ6S6/cbNmyRW63WzU1NcrNzVV1dbUKCgq0b98+JSQkHLe+u7tbl156qRISEvT4448rOTlZBw8e1Lhx48I/PAAAiEiWxk1VVZVKS0vldDolSTU1NXrmmWdUW1urpUuXHre+trZWH374oV599VWNHDlSkpSamhrOkQEAQISz7G2p7u5uNTU1KT8//+thoqKUn5+vhoaGXo95+umnlZeXp8WLFysxMVHnnnuuVq5cKb/fH66xAQBAhLPsys3Ro0fl9/uVmJjYY39iYqLefvvtXo/529/+ppdeekkLFy7Utm3btH//fl1//fX6/PPPVVlZ2esxXV1d6urqCj3u6OgYuJMAAAARx/Ibik9GIBBQQkKC1q1bp6ysLBUVFWnZsmWqqanp8xiPx6O4uLjQlpKSEsaJAQBAuFkWN/Hx8bLb7Wptbe2xv7W1VUlJSb0eM3HiRJ1zzjmy2+2hfTNmzJDP51N3d3evx5SXl6u9vT20tbS0DNxJAACAiGNZ3ERHRysrK0terze0LxAIyOv1Ki8vr9dj5syZo/379ysQCIT2vfPOO5o4caKio6N7PcbhcGjs2LE9NgAAYC5L35Zyu91av369Hn74Yb311lv66U9/qs7OztCnp4qLi1VeXh5a/9Of/lQffvihysrK9M477+iZZ57RypUrtXjxYqtOAQAARBhLPwpeVFSkI0eOqKKiQj6fT5mZmaqrqwvdZNzc3KyoqK/7KyUlRc8995x+/vOfa9asWUpOTlZZWZmWLFli1SkAAIAIY/nXL7hcLrlcrl6fq6+vP25fXl6eXnvttUGeCgAADFVD6tNSAAAA34S4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYJQRVg8AWGXOfXOsHmFA/OGGP1g9AgBEFK7cAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjBIRcbNmzRqlpqYqJiZGubm5amxs7HPtxo0bZbPZemwxMTFhnBYAAEQyy+Nmy5Ytcrvdqqys1K5du5SRkaGCggK1tbX1eczYsWN1+PDh0Hbw4MEwTgwAACKZ5XFTVVWl0tJSOZ1Opaenq6amRrGxsaqtre3zGJvNpqSkpNCWmJgYxokBAEAkszRuuru71dTUpPz8/NC+qKgo5efnq6Ghoc/jPv74Y02ePFkpKSm64oor9Je//CUc4wIAgCHA0rg5evSo/H7/cVdeEhMT5fP5ej1m+vTpqq2t1VNPPaVHH31UgUBAs2fP1vvvv9/r+q6uLnV0dPTYAACAuSx/W+pk5eXlqbi4WJmZmZo7d662bt2qCRMm6IEHHuh1vcfjUVxcXGhLSUkJ88QAACCcLI2b+Ph42e12tba29tjf2tqqpKSkfv2MkSNH6vzzz9f+/ft7fb68vFzt7e2hraWl5VvPDQAAItcIK188OjpaWVlZ8nq9KiwslCQFAgF5vV65XK5+/Qy/3689e/bosssu6/V5h8Mhh8MxUCMbqfm286weYUCcVbHH6hEAABHA0riRJLfbrZKSEmVnZysnJ0fV1dXq7OyU0+mUJBUXFys5OVkej0eSdNttt+l73/uepk2bpo8++kh33XWXDh48qJ/85CdWngYAAIgQlsdNUVGRjhw5ooqKCvl8PmVmZqquri50k3Fzc7Oior5+9+zvf/+7SktL5fP5dPrppysrK0uvvvqq0tPTrToFAAAQQSyPG0lyuVx9vg1VX1/f4/G9996re++9NwxTAQCAoWjIfVoKAADgRIgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABglIiImzVr1ig1NVUxMTHKzc1VY2Njv47bvHmzbDabCgsLB3dAAAAwZFgeN1u2bJHb7VZlZaV27dqljIwMFRQUqK2t7YTHHThwQDfddJMuvPDCME0KAACGAsvjpqqqSqWlpXI6nUpPT1dNTY1iY2NVW1vb5zF+v18LFy7U8uXLNWXKlDBOCwAAIp2lcdPd3a2mpibl5+eH9kVFRSk/P18NDQ19HnfbbbcpISFBixYt+sbX6OrqUkdHR48NAACYy9K4OXr0qPx+vxITE3vsT0xMlM/n6/WYHTt2aMOGDVq/fn2/XsPj8SguLi60paSkfOu5AQBA5Bph9QAn49ixY7r22mu1fv16xcfH9+uY8vJyud3u0OOOjo4+Ayfr5v8ekDmt1nRXsdUjAABgmVOKm4svvlhbt27VuHHjeuzv6OhQYWGhXnrppX79nPj4eNntdrW2tvbY39raqqSkpOPW//Wvf9WBAwd0+eWXh/YFAgFJ0ogRI7Rv3z5NnTq1xzEOh0MOh6Nf8wAAgKHvlN6Wqq+vV3d393H7P/vsM/3+97/v98+Jjo5WVlaWvF5vaF8gEJDX61VeXt5x69PS0rRnzx7t3r07tP3whz/U97//fe3evZu3nAAAwMldufnzn/8c+vObb77Z474Yv9+vuro6JScnn9QAbrdbJSUlys7OVk5Ojqqrq9XZ2Smn0ylJKi4uVnJysjwej2JiYnTuuef2OP6rq0f/vB8AAAxPJxU3mZmZstlsstlsuvjii497ftSoUbrvvvtOaoCioiIdOXJEFRUV8vl8yszMVF1dXegm4+bmZkVFWf6JdQAAMEScVNy89957CgaDmjJlihobGzVhwoTQc9HR0UpISJDdbj/pIVwul1wuV6/P1dfXn/DYjRs3nvTrAQAAc51U3EyePFnS1zfxAgAARJp+x83TTz+t+fPna+TIkXr66adPuPaHP/zhtx4MAADgVPQ7bgoLC+Xz+ZSQkHDCL6q02Wzy+/0DMRsAAMBJ63fc/ONbUbwtBQAAItUp/4Zir9crr9ertra2HrFjs9m0YcOGARkOAADgZJ1S3Cxfvly33XabsrOzNXHiRNlstoGeCwAA4JScUtzU1NRo48aNuvbaawd6HgAAgG/llH47Xnd3t2bPnj3QswAAAHxrpxQ3P/nJT7Rp06aBngUAAOBb6/fbUm63O/TnQCCgdevW6cUXX9SsWbM0cuTIHmurqqoGbkIAAICT0O+4eeONN3o8zszMlCTt3bu3x35uLgYAAFbqd9y8/PLLgzkHAADAgODrtgEAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFEiIm7WrFmj1NRUxcTEKDc3V42NjX2u3bp1q7KzszVu3DiddtppyszM1COPPBLGaQEAQCSzPG62bNkit9utyspK7dq1SxkZGSooKFBbW1uv68ePH69ly5apoaFBf/7zn+V0OuV0OvXcc8+FeXIAABCJLI+bqqoqlZaWyul0Kj09XTU1NYqNjVVtbW2v6+fNm6d/+7d/04wZMzR16lSVlZVp1qxZ2rFjR5gnBwAAkcjSuOnu7lZTU5Py8/ND+6KiopSfn6+GhoZvPD4YDMrr9Wrfvn266KKLel3T1dWljo6OHhsAADCXpXFz9OhR+f1+JSYm9tifmJgon8/X53Ht7e0aPXq0oqOjtWDBAt1333269NJLe13r8XgUFxcX2lJSUgb0HAAAQGSx/G2pUzFmzBjt3r1bO3fu1IoVK+R2u1VfX9/r2vLycrW3t4e2lpaW8A4LAADCaoSVLx4fHy+73a7W1tYe+1tbW5WUlNTncVFRUZo2bZokKTMzU2+99ZY8Ho/mzZt33FqHwyGHwzGgcwMAgMhl6ZWb6OhoZWVlyev1hvYFAgF5vV7l5eX1++cEAgF1dXUNxogAAGCIsfTKjSS53W6VlJQoOztbOTk5qq6uVmdnp5xOpySpuLhYycnJ8ng8kr68hyY7O1tTp05VV1eXtm3bpkceeURr16618jQAAECEsDxuioqKdOTIEVVUVMjn8ykzM1N1dXWhm4ybm5sVFfX1BabOzk5df/31ev/99zVq1CilpaXp0UcfVVFRkVWnAAAAIojlcSNJLpdLLper1+f++UbhO+64Q3fccUcYpgIAAEPRkPy0FAAAQF+IGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFEiIm7WrFmj1NRUxcTEKDc3V42NjX2uXb9+vS688EKdfvrpOv3005Wfn3/C9QAAYHixPG62bNkit9utyspK7dq1SxkZGSooKFBbW1uv6+vr63X11Vfr5ZdfVkNDg1JSUvSDH/xAH3zwQZgnBwAAkcjyuKmqqlJpaamcTqfS09NVU1Oj2NhY1dbW9rr+17/+ta6//nplZmYqLS1NDz74oAKBgLxeb5gnBwAAkcjSuOnu7lZTU5Py8/ND+6KiopSfn6+GhoZ+/YxPPvlEn3/+ucaPH9/r811dXero6OixAQAAc1kaN0ePHpXf71diYmKP/YmJifL5fP36GUuWLNGkSZN6BNI/8ng8iouLC20pKSnfem4AABC5LH9b6ttYtWqVNm/erCeeeEIxMTG9rikvL1d7e3toa2lpCfOUAAAgnEZY+eLx8fGy2+1qbW3tsb+1tVVJSUknPPbuu+/WqlWr9OKLL2rWrFl9rnM4HHI4HAMyLwAAiHyWXrmJjo5WVlZWj5uBv7o5OC8vr8/jfvWrX+n2229XXV2dsrOzwzEqAAAYIiy9ciNJbrdbJSUlys7OVk5Ojqqrq9XZ2Smn0ylJKi4uVnJysjwejyTpzjvvVEVFhTZt2qTU1NTQvTmjR4/W6NGjLTsPAAAQGSyPm6KiIh05ckQVFRXy+XzKzMxUXV1d6Cbj5uZmRUV9fYFp7dq16u7u1o9+9KMeP6eyslK33nprOEcHAAARyPK4kSSXyyWXy9Xrc/X19T0eHzhwYPAHAgAAQ9aQ/rQUAADAPyNuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARrE8btasWaPU1FTFxMQoNzdXjY2Nfa79y1/+ov/4j/9QamqqbDabqqurwzcoAAAYEiyNmy1btsjtdquyslK7du1SRkaGCgoK1NbW1uv6Tz75RFOmTNGqVauUlJQU5mkBAMBQYGncVFVVqbS0VE6nU+np6aqpqVFsbKxqa2t7XX/BBRforrvu0lVXXSWHwxHmaQEAwFBgWdx0d3erqalJ+fn5Xw8TFaX8/Hw1NDQM2Ot0dXWpo6OjxwYAAMxlWdwcPXpUfr9fiYmJPfYnJibK5/MN2Ot4PB7FxcWFtpSUlAH72QAAIPJYfkPxYCsvL1d7e3toa2lpsXokAAAwiEZY9cLx8fGy2+1qbW3tsb+1tXVAbxZ2OBzcnwMAwDBi2ZWb6OhoZWVlyev1hvYFAgF5vV7l5eVZNRYAABjiLLtyI0lut1slJSXKzs5WTk6Oqqur1dnZKafTKUkqLi5WcnKyPB6PpC9vQn7zzTdDf/7ggw+0e/dujR49WtOmTbPsPAAAQOSwNG6Kiop05MgRVVRUyOfzKTMzU3V1daGbjJubmxUV9fXFpUOHDun8888PPb777rt19913a+7cuaqvrw/3+AAAIAJZGjeS5HK55HK5en3un4MlNTVVwWAwDFMBAIChyvhPSwEAgOGFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABglIuJmzZo1Sk1NVUxMjHJzc9XY2HjC9b/97W+VlpammJgYnXfeedq2bVuYJgUAAJHO8rjZsmWL3G63KisrtWvXLmVkZKigoEBtbW29rn/11Vd19dVXa9GiRXrjjTdUWFiowsJC7d27N8yTAwCASGR53FRVVam0tFROp1Pp6emqqalRbGysamtre12/evVq/eu//qtuvvlmzZgxQ7fffru++93v6v777w/z5AAAIBJZGjfd3d1qampSfn5+aF9UVJTy8/PV0NDQ6zENDQ091ktSQUFBn+sBAMDwMsLKFz969Kj8fr8SExN77E9MTNTbb7/d6zE+n6/X9T6fr9f1XV1d6urqCj1ub2+XJHV0dBy31t/16UnNH6l6O7cTOfaZf5AmCa+TPe8vPv1ikCYJr5M9784vhud5f9r1ySBNEl4ne96fff75IE0SXid73h9/1jlIk4TXyZ73P/73bijr7by/2hcMBr/xeEvjJhw8Ho+WL19+3P6UlBQLpgmPuPv+j9UjWMMTZ/UElohbMjzPW3HD87z/3xqrJ7DGHY8Nz3/eusPqAayxatWqPp87duyY4r7h339L4yY+Pl52u12tra099re2tiopKanXY5KSkk5qfXl5udxud+hxIBDQhx9+qDPOOEM2m+1bnsHJ6ejoUEpKilpaWjR27NiwvraVOG/OezjgvDnv4cDK8w4Ggzp27JgmTZr0jWstjZvo6GhlZWXJ6/WqsLBQ0pfx4fV65XK5ej0mLy9PXq9XN954Y2jfCy+8oLy8vF7XOxwOORyOHvvGjRs3EOOfsrFjxw6rfxm+wnkPL5z38MJ5Dy9Wnfc3XbH5iuVvS7ndbpWUlCg7O1s5OTmqrq5WZ2ennE6nJKm4uFjJycnyeDySpLKyMs2dO1f33HOPFixYoM2bN+v111/XunXrrDwNAAAQISyPm6KiIh05ckQVFRXy+XzKzMxUXV1d6Kbh5uZmRUV9/aGu2bNna9OmTfrFL36hW265RWeffbaefPJJnXvuuVadAgAAiCCWx40kuVyuPt+Gqq+vP27flVdeqSuvvHKQpxp4DodDlZWVx71NZjrOm/MeDjhvzns4GCrnbQv25zNVAAAAQ4Tlv6EYAABgIBE3AADAKMQNAAAwCnETBq+88oouv/xyTZo0STabTU8++aTVI4WFx+PRBRdcoDFjxighIUGFhYXat2+f1WMNurVr12rWrFmh3wORl5enZ5991uqxwm7VqlWy2Ww9fieViW699VbZbLYeW1pamtVjhcUHH3yga665RmeccYZGjRql8847T6+//rrVYw2q1NTU4/5522w2LV682OrRBpXf79cvf/lLfec739GoUaM0depU3X777f36KgQrRMSnpUzX2dmpjIwMXXfddfr3f/93q8cJm+3bt2vx4sW64IIL9MUXX+iWW27RD37wA7355ps67bTTrB5v0Jx55platWqVzj77bAWDQT388MO64oor9MYbb2jmzJlWjxcWO3fu1AMPPKBZs2ZZPUpYzJw5Uy+++GLo8YgR5v9f69///nfNmTNH3//+9/Xss89qwoQJevfdd3X66adbPdqg2rlzp/z+r7+Pb+/evbr00kuH5Cd4T8add96ptWvX6uGHH9bMmTP1+uuvy+l0Ki4uTj/72c+sHu845v8bGAHmz5+v+fPnWz1G2NXV1fV4vHHjRiUkJKipqUkXXXSRRVMNvssvv7zH4xUrVmjt2rV67bXXhkXcfPzxx1q4cKHWr1+vO+4YHl+MM2LEiD6/AsZUd955p1JSUvTQQw+F9n3nO9+xcKLwmDBhQo/Hq1at0tSpUzV37lyLJgqPV199VVdccYUWLFgg6csrWL/5zW/U2Nho8WS9420phM1X38g+fvx4iycJH7/fr82bN6uzs7PPrwgxzeLFi7VgwQLl5+dbPUrYvPvuu5o0aZKmTJmihQsXqrm52eqRBt3TTz+t7OxsXXnllUpISND555+v9evXWz1WWHV3d+vRRx/VddddF/bvKgy32bNny+v16p133pEk/elPf9KOHTsi9i/uXLlBWAQCAd14442aM2fOsPht0nv27FFeXp4+++wzjR49Wk888YTS09OtHmvQbd68Wbt27dLOnTutHiVscnNztXHjRk2fPl2HDx/W8uXLdeGFF2rv3r0aM2aM1eMNmr/97W9au3at3G63brnlFu3cuVM/+9nPFB0drZKSEqvHC4snn3xSH330kX784x9bPcqgW7p0qTo6OpSWlia73S6/368VK1Zo4cKFVo/WK+IGYbF48WLt3btXO3bssHqUsJg+fbp2796t9vZ2Pf744yopKdH27duNDpyWlhaVlZXphRdeUExMjNXjhM0//s111qxZys3N1eTJk/XYY49p0aJFFk42uAKBgLKzs7Vy5UpJ0vnnn6+9e/eqpqZm2MTNhg0bNH/+/H59S/VQ99hjj+nXv/61Nm3apJkzZ2r37t268cYbNWnSpIj8503cYNC5XC797//+r1555RWdeeaZVo8TFtHR0Zo2bZokKSsrSzt37tTq1av1wAMPWDzZ4GlqalJbW5u++93vhvb5/X698soruv/++9XV1SW73W7hhOExbtw4nXPOOdq/f7/VowyqiRMnHhfrM2bM0P/8z/9YNFF4HTx4UC+++KK2bt1q9ShhcfPNN2vp0qW66qqrJEnnnXeeDh48KI/HQ9xgeAkGg7rhhhv0xBNPqL6+fljcbNiXQCCgrq4uq8cYVJdccon27NnTY5/T6VRaWpqWLFkyLMJG+vKG6r/+9a+69tprrR5lUM2ZM+e4X+3wzjvvaPLkyRZNFF4PPfSQEhISQjfYmu6TTz7p8SXWkmS32xUIBCya6MSImzD4+OOPe/wt7r333tPu3bs1fvx4nXXWWRZONrgWL16sTZs26amnntKYMWPk8/kkSXFxcRo1apTF0w2e8vJyzZ8/X2eddZaOHTumTZs2qb6+Xs8995zVow2qMWPGHHc/1WmnnaYzzjjD6PusbrrpJl1++eWaPHmyDh06pMrKStntdl199dVWjzaofv7zn2v27NlauXKl/vM//1ONjY1at26d1q1bZ/Vogy4QCOihhx5SSUnJsPjYv/Tlp0BXrFihs846SzNnztQbb7yhqqoqXXfddVaP1rsgBt3LL78clHTcVlJSYvVog6q3c5YUfOihh6webVBdd911wcmTJwejo6ODEyZMCF5yySXB559/3uqxLDF37txgWVmZ1WMMqqKiouDEiROD0dHRweTk5GBRUVFw//79Vo8VFr/73e+C5557btDhcATT0tKC69ats3qksHjuueeCkoL79u2zepSw6ejoCJaVlQXPOuusYExMTHDKlCnBZcuWBbu6uqwerVd8KzgAADAKv+cGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBsCwlpqaqurqaqvHADCAiBsAEWPevHm68cYbrR4DwBBH3AAAAKMQNwAiwo9//GNt375dq1evls1mk81m04EDB7R9+3bl5OTI4XBo4sSJWrp0qb744ovQcfPmzZPL5ZLL5VJcXJzi4+P1y1/+Uqf6tXkPPvigxo0bJ6/XO1CnBiDMiBsAEWH16tXKy8tTaWmpDh8+rMOHD2vkyJG67LLLdMEFF+hPf/qT1q5dqw0bNuiOO+7ocezDDz+sESNGqLGxUatXr1ZVVZUefPDBk57hV7/6lZYuXarnn39el1xyyUCdGoAwG2H1AAAgSXFxcYqOjlZsbKySkpIkScuWLVNKSoruv/9+2Ww2paWl6dChQ1qyZIkqKioUFfXl389SUlJ07733ymazafr06dqzZ4/uvfdelZaW9vv1lyxZokceeUTbt2/XzJkzB+UcAYQHV24ARKy33npLeXl5stlsoX1z5szRxx9/rPfffz+073vf+16PNXl5eXr33Xfl9/v79Tr33HOP1q9frx07dhA2gAGIGwDD3oUXXii/36/HHnvM6lEADADiBkDEiI6O7nG1ZcaMGWpoaOhxc/Af/vAHjRkzRmeeeWZo3x//+MceP+e1117T2WefLbvd3q/XzcnJ0bPPPquVK1fq7rvv/pZnAcBqxA2AiJGamqo//vGPOnDggI4eParrr79eLS0tuuGGG/T222/rqaeeUmVlpdxud+h+G0lqbm6W2+3Wvn379Jvf/Eb33XefysrKTuq1Z8+erW3btmn58uX8Uj9giOOGYgAR46abblJJSYnS09P16aef6r333tO2bdt08803KyMjQ+PHj9eiRYv0i1/8osdxxcXF+vTTT5WTkyO73a6ysjL913/910m//r/8y7/omWee0WWXXSa73a4bbrhhoE4NQBjZgqf6yyAAIALMmzdPmZmZXG0BEMLbUgAAwCjEDQBj/f73v9fo0aP73ACYibelABjr008/1QcffNDn89OmTQvjNADChbgBAABG4W0pAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFH+P1hqOmKTb9k9AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "sns.barplot(x='top_k', y='hit', data=hit_stat_df, errorbar=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7925564a-7d30-4914-baaf-4a00abb7686d",
   "metadata": {
    "papermill": {
     "duration": 0.109216,
     "end_time": "2024-11-23T14:35:26.464009",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.354793",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 生成答案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "27132c3b-0051-4df6-bf57-fd804acb8d17",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:52.898987Z",
     "iopub.status.busy": "2024-12-04T14:09:52.898643Z",
     "iopub.status.idle": "2024-12-04T14:09:52.973426Z",
     "shell.execute_reply": "2024-12-04T14:09:52.972960Z",
     "shell.execute_reply.started": "2024-12-04T14:09:52.898972Z"
    },
    "papermill": {
     "duration": 0.199165,
     "end_time": "2024-11-23T14:35:27.323500",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.124335",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1242952/3342461511.py:3: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  ollama_llm = Ollama(\n"
     ]
    }
   ],
   "source": [
    "from langchain.llms import Ollama\n",
    "\n",
    "ollama_llm = Ollama(\n",
    "    model='qwen2:7b-instruct',\n",
    "    base_url='http://localhost:11434',\n",
    "    top_k=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "50404beb-3be0-4aaa-b124-8c7a52b84531",
   "metadata": {
    "editable": true,
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:52.974166Z",
     "iopub.status.busy": "2024-12-04T14:09:52.973909Z",
     "iopub.status.idle": "2024-12-04T14:09:52.977747Z",
     "shell.execute_reply": "2024-12-04T14:09:52.977414Z",
     "shell.execute_reply.started": "2024-12-04T14:09:52.974153Z"
    },
    "papermill": {
     "duration": 0.159318,
     "end_time": "2024-11-23T14:35:26.768506",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.609188",
     "status": "completed"
    },
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "def rag(vector_db, llm, query, n_chunks=4):\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个金融分析师，擅长根据所获取的信息片段，对问题进行分析和推理。\n",
    "你的任务是根据所获取的信息片段（<<<<context>>><<<</context>>>之间的内容）回答问题。\n",
    "回答保持简洁，不必重复问题，不要添加描述性解释和与答案无关的任何内容。\n",
    "已知信息：\n",
    "<<<<context>>>\n",
    "{{knowledge}}\n",
    "<<<</context>>>\n",
    "\n",
    "问题：{{query}}\n",
    "请回答：\n",
    "\"\"\".strip()\n",
    "    chunks = vector_db.similarity_search(query, k=n_chunks)\n",
    "    prompt = prompt_tmpl.replace('{{knowledge}}', '\\n\\n'.join([doc.page_content for doc in chunks])).replace('{{query}}', query)\n",
    "    retry_count = 3\n",
    "\n",
    "    resp = ''\n",
    "    while retry_count > 0:\n",
    "        try:\n",
    "            resp = llm.invoke(prompt)\n",
    "            break\n",
    "        except Exception as e:\n",
    "            retry_count -= 1\n",
    "            sleeping_seconds = 2 ** (4 - retry_count)\n",
    "            print(f\"query={query}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "            \n",
    "            time.sleep(sleeping_seconds)\n",
    "    \n",
    "    return resp, chunks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95e5a804-2dc6-411c-ba71-6ccf765b2b73",
   "metadata": {
    "papermill": {
     "duration": 0.135973,
     "end_time": "2024-11-23T14:35:27.001401",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.865428",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "166392d8-f801-4372-b8ad-3e79aef0b350",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:52.978276Z",
     "iopub.status.busy": "2024-12-04T14:09:52.978154Z",
     "iopub.status.idle": "2024-12-04T14:09:52.985241Z",
     "shell.execute_reply": "2024-12-04T14:09:52.984857Z",
     "shell.execute_reply.started": "2024-12-04T14:09:52.978264Z"
    },
    "papermill": {
     "duration": 0.141864,
     "end_time": "2024-11-23T14:35:27.564409",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.422545",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})\n",
    "\n",
    "def predict(vector_db, llm, prediction_df, n_chunks):\n",
    "    prediction_df = prediction_df.copy()\n",
    "    answer_dict = {}\n",
    "\n",
    "    for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        uuid = row['uuid']\n",
    "        question = row['question']\n",
    "        answer, chunks = rag(vector_db, llm, question, n_chunks=n_chunks)\n",
    "        assert len(chunks) <= n_chunks\n",
    "        answer_dict[question] = {\n",
    "            'uuid': uuid,\n",
    "            'ref_answer': row['ref_answer'],\n",
    "            'gen_answer': answer,\n",
    "            'chunks': chunks\n",
    "        }\n",
    "\n",
    "    prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])\n",
    "    prediction_df.loc[:, 'chunks'] = prediction_df['question'].apply(lambda q: answer_dict[q]['chunks'])\n",
    "\n",
    "    return prediction_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ca46d5f1-e698-457d-abb6-92d83cd59c66",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:09:52.985734Z",
     "iopub.status.busy": "2024-12-04T14:09:52.985611Z",
     "iopub.status.idle": "2024-12-04T14:12:30.567162Z",
     "shell.execute_reply": "2024-12-04T14:12:30.566609Z",
     "shell.execute_reply.started": "2024-12-04T14:09:52.985722Z"
    },
    "papermill": {
     "duration": 514.92352,
     "end_time": "2024-11-23T14:44:02.805529",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.882009",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3ff5185a1e4b49ebb483ad9aa0c5e7d0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pred_df = predict(md_vector_db, ollama_llm, prediction_df, n_chunks=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "f7026bac-9927-4a33-85c0-bc1b35f3a603",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:12:30.567777Z",
     "iopub.status.busy": "2024-12-04T14:12:30.567619Z",
     "iopub.status.idle": "2024-12-04T14:12:30.742625Z",
     "shell.execute_reply": "2024-12-04T14:12:30.740254Z",
     "shell.execute_reply.started": "2024-12-04T14:12:30.567762Z"
    }
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'predictions.xlsx')\n",
    "pred_df.to_excel(save_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d79e974-089f-4c08-ba5e-804f6542e06a",
   "metadata": {
    "papermill": {
     "duration": 0.14423,
     "end_time": "2024-11-23T14:44:03.513124",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.368894",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "217568fe-c0e4-49eb-9a7c-9fdfbc033d8a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:12:30.745910Z",
     "iopub.status.busy": "2024-12-04T14:12:30.745194Z",
     "iopub.status.idle": "2024-12-04T14:12:31.000394Z",
     "shell.execute_reply": "2024-12-04T14:12:30.999945Z",
     "shell.execute_reply.started": "2024-12-04T14:12:30.745842Z"
    },
    "papermill": {
     "duration": 0.369729,
     "end_time": "2024-11-23T14:44:04.017198",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.647469",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "import time\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        \n",
    "        retry_count = 3\n",
    "        result = ''\n",
    "        \n",
    "        while retry_count > 0:\n",
    "            try:\n",
    "                result = judge_llm.invoke(prompt).content\n",
    "                break\n",
    "            except Exception as e:\n",
    "                retry_count -= 1\n",
    "                sleeping_seconds = 2 ** (4 - retry_count)\n",
    "                print(f\"query={question}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "                \n",
    "                time.sleep(sleeping_seconds)\n",
    "        \n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "71db81af-b8f9-47ba-958b-761896516605",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:12:31.001152Z",
     "iopub.status.busy": "2024-12-04T14:12:31.000974Z",
     "iopub.status.idle": "2024-12-04T14:15:06.204249Z",
     "shell.execute_reply": "2024-12-04T14:15:06.201803Z",
     "shell.execute_reply.started": "2024-12-04T14:12:31.001139Z"
    },
    "papermill": {
     "duration": 150.566109,
     "end_time": "2024-11-23T14:46:34.714324",
     "exception": false,
     "start_time": "2024-11-23T14:44:04.148215",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2efff7a3bbe74b789c8fb611c40ff20f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pred_df['raw_score'] = evaluate(pred_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "7da1b98e-99aa-4e11-9297-91eac1c62493",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.207425Z",
     "iopub.status.busy": "2024-12-04T14:15:06.206645Z",
     "iopub.status.idle": "2024-12-04T14:15:06.221348Z",
     "shell.execute_reply": "2024-12-04T14:15:06.219068Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.207354Z"
    },
    "papermill": {
     "duration": 0.138037,
     "end_time": "2024-11-23T14:46:35.040595",
     "exception": false,
     "start_time": "2024-11-23T14:46:34.902558",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['是', '否'], dtype=object)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df['raw_score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "2c99c078-d294-40b8-b57b-31cfd7349c3e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.224520Z",
     "iopub.status.busy": "2024-12-04T14:15:06.223784Z",
     "iopub.status.idle": "2024-12-04T14:15:06.240909Z",
     "shell.execute_reply": "2024-12-04T14:15:06.239271Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.224451Z"
    },
    "papermill": {
     "duration": 0.107466,
     "end_time": "2024-11-23T14:46:35.243603",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.136137",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "pred_df['score'] = (pred_df['raw_score'] == '是').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "423897f2-786e-415b-a613-55a4359faf76",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.243081Z",
     "iopub.status.busy": "2024-12-04T14:15:06.242529Z",
     "iopub.status.idle": "2024-12-04T14:15:06.253726Z",
     "shell.execute_reply": "2024-12-04T14:15:06.252126Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.243030Z"
    },
    "papermill": {
     "duration": 0.094328,
     "end_time": "2024-11-23T14:46:35.431162",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.336834",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.71"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df['score'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "79325429-9cf1-4e2c-95ac-cb0c1a3b6156",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.255933Z",
     "iopub.status.busy": "2024-12-04T14:15:06.255411Z",
     "iopub.status.idle": "2024-12-04T14:15:06.394885Z",
     "shell.execute_reply": "2024-12-04T14:15:06.394270Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.255883Z"
    },
    "papermill": {
     "duration": 0.289336,
     "end_time": "2024-11-23T14:46:35.804651",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.515315",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'eval_df.xlsx')\n",
    "pred_df.to_excel(save_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9264087-cea4-4131-98eb-7875b0cbaddf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T08:01:27.295186Z",
     "iopub.status.busy": "2024-12-04T08:01:27.294401Z",
     "iopub.status.idle": "2024-12-04T08:01:27.302698Z",
     "shell.execute_reply": "2024-12-04T08:01:27.301299Z",
     "shell.execute_reply.started": "2024-12-04T08:01:27.295113Z"
    }
   },
   "source": [
    "# 结果分析"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e897c3a-e4d6-414f-96e2-06fee47e0960",
   "metadata": {},
   "source": [
    "从打分结果看，加入标题后，结果又有了大幅度提升"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "88fc7227-9c21-48da-b179-5070406eb113",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.395780Z",
     "iopub.status.busy": "2024-12-04T14:15:06.395538Z",
     "iopub.status.idle": "2024-12-04T14:15:06.400326Z",
     "shell.execute_reply": "2024-12-04T14:15:06.399846Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.395763Z"
    },
    "papermill": {
     "duration": 0.088622,
     "end_time": "2024-11-23T14:46:36.016801",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.928179",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "pred_df['avg_chunk_len'] = pred_df['chunks'].apply(lambda chunks: sum([len(d.page_content) for d in chunks]) / len(chunks))\n",
    "pred_df['max_chunk_len'] = pred_df['chunks'].apply(lambda chunks: max([len(d.page_content) for d in chunks]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "fb1a3e72-cb28-419a-8f6c-a0ef5d34c67a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.404226Z",
     "iopub.status.busy": "2024-12-04T14:15:06.403845Z",
     "iopub.status.idle": "2024-12-04T14:15:06.407247Z",
     "shell.execute_reply": "2024-12-04T14:15:06.406708Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.404205Z"
    }
   },
   "outputs": [],
   "source": [
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "ff2413c8-f7a7-4e70-9178-c42ff2427426",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.408229Z",
     "iopub.status.busy": "2024-12-04T14:15:06.407993Z",
     "iopub.status.idle": "2024-12-04T14:15:06.496492Z",
     "shell.execute_reply": "2024-12-04T14:15:06.496112Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.408207Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='score', ylabel='avg_chunk_len'>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAqoklEQVR4nO3de3SU9YH/8c/kOiFhgtwSkCSgVCESpALCqGULpImRIpdoLURMLbvupgGRoC2coiBe4rKVW0XFloLnVExLW3oBosTIpZCAiIIUlALiJhgmYUFyYZ0kJPP7w2V+TgFNJ5M8wzfv1zlzTub7PDPzeaRpPvN9bjaPx+MRAACAoUKsDgAAANCWKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEYLszpAMGhublZFRYU6d+4sm81mdRwAANACHo9HtbW16t27t0JCrjx/Q9mRVFFRoYSEBKtjAAAAP5SXl6tPnz5XXE7ZkdS5c2dJX/zHcjgcFqcBAAAtUVNTo4SEBO/f8Suh7EjeXVcOh4OyAwDAVebrDkHhAGUAAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGiUHQAAYDTKDgAAMBplBwAAGI2yAwAAjMaNQNEiHo9Hbrfb6hit5vF4VF9fL0mKjIz82pvHBTu73X7VbwMAtDXKDlrE7XYrIyPD6hj4B4WFhYqKirI6BgAENXZjAQAAozGzgxax2+0qLCy0Okarud1uTZo0SZK0YcMG2e12ixO1ztWeH8GB3dTBid3UgUPZQYvYbDbjdpfY7XbjtgnwB7upgxO7qQOH3VgAAMBols7sLFy4UE8++aTP2I033qiPPvpI0hffNubMmaOCggLV19crPT1dL774ouLi4rzrl5WVKScnR1u3blVMTIyys7OVn5+vsDAmrQCgJdhNHZyu9vzBxPJGcNNNN+mtt97yPv9ySZk9e7Y2bdqk9evXKzY2VjNmzNDkyZO1a9cuSVJTU5PGjRun+Ph4lZSU6NSpU3rggQcUHh6uZ599tt23BQCuRuymhuksLzthYWGKj4+/ZLy6ulqrV6/WunXrNGbMGEnSmjVrNHDgQO3evVsjR47Uli1bdPjwYb311luKi4vTkCFD9NRTT+knP/mJFi5cqIiIiPbeHAAAEGQsP2bn6NGj6t27t6677jplZWWprKxMkrRv3z41NjYqNTXVu+6AAQOUmJio0tJSSVJpaalSUlJ8dmulp6erpqZGhw4duuJn1tfXq6amxucBAADMZGnZGTFihNauXas33nhDL730kk6cOKFvfetbqq2tlcvlUkREhLp06eLzmri4OLlcLkmSy+XyKToXl19cdiX5+fmKjY31PhISEgK7YQAAIGhYuhvry6c6Dh48WCNGjFBSUpJ++9vftum+1nnz5ikvL8/7vKamhsIDAIChLN+N9WVdunTRDTfcoGPHjik+Pl4NDQ06d+6czzqVlZXeY3zi4+NVWVl5yfKLy64kMjJSDofD5wEAAMwUVGWnrq5Ox48fV69evTR06FCFh4eruLjYu/zIkSMqKyuT0+mUJDmdTh08eFBVVVXedYqKiuRwOJScnNzu+QEAQPCxdDfWo48+qvHjxyspKUkVFRVasGCBQkNDNWXKFMXGxmr69OnKy8tT165d5XA4NHPmTDmdTo0cOVKSlJaWpuTkZE2bNk2LFy+Wy+XS/PnzlZubq8jISCs3DQAABAlLy87Jkyc1ZcoUnTlzRj169NAdd9yh3bt3q0ePHpKkpUuXKiQkRJmZmT4XFbwoNDRUGzduVE5OjpxOp6Kjo5Wdna1FixZZtUkAACDIWFp2CgoKvnK53W7XypUrtXLlyiuuk5SUpM2bNwc6GgAAMERQHbMDAAAQaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGiUHQAAYDTKDgAAMBplBwAAGI2yAwAAjEbZAQAARqPsAAAAo1F2AACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGiUHQAAYDTKDgAAMBplBwAAGI2yAwAAjEbZAQAARqPsAAAAo1F2AACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIwWZnWAjsDj8cjtdlsdA5LPvwP/JsHDbrfLZrNZHQOAoSg77cDtdisjI8PqGPgHkyZNsjoC/k9hYaGioqKsjgHAUOzGAgAARmNmp53VDZkiTwj/2S3j8UjNF774OSRMYteJZWzNFxSz/3WrYwDoAPir2848IWFSaLjVMTq4CKsDQJLH6gAAOoygKjvPPfec5s2bp1mzZmnZsmWSvjjeZc6cOSooKFB9fb3S09P14osvKi4uzvu6srIy5eTkaOvWrYqJiVF2drby8/MVFhZUmwfAQJyAEDw4ASE4BcMJCEHTBvbu3atVq1Zp8ODBPuOzZ8/Wpk2btH79esXGxmrGjBmaPHmydu3aJUlqamrSuHHjFB8fr5KSEp06dUoPPPCAwsPD9eyzz1qxKQA6EE5ACE6cgBA8guEEhKA4QLmurk5ZWVn6xS9+oWuuucY7Xl1drdWrV2vJkiUaM2aMhg4dqjVr1qikpES7d++WJG3ZskWHDx/Wr3/9aw0ZMkQZGRl66qmntHLlSjU0NFi1SQAAIEgExcxObm6uxo0bp9TUVD399NPe8X379qmxsVGpqanesQEDBigxMVGlpaUaOXKkSktLlZKS4rNbKz09XTk5OTp06JC++c1vXvJ59fX1qq+v9z6vqalpoy0D0JG8cMdZRYZyNJJVPB6pofmLnyNCOP/ASvVNNs3Y2dXqGF6Wl52CggK999572rt37yXLXC6XIiIi1KVLF5/xuLg4uVwu7zpfLjoXl19cdjn5+fl68sknA5AeAP6/yFCPIkOtTtGx2a0OgP8TXKXf0t1Y5eXlmjVrll577TXZ7e33P9F58+apurra+ygvL2+3zwYAAO3L0rKzb98+VVVV6ZZbblFYWJjCwsK0fft2rVixQmFhYYqLi1NDQ4POnTvn87rKykrFx8dLkuLj41VZWXnJ8ovLLicyMlIOh8PnAQAAzGRp2Rk7dqwOHjyo/fv3ex/Dhg1TVlaW9+fw8HAVFxd7X3PkyBGVlZXJ6XRKkpxOpw4ePKiqqirvOkVFRXI4HEpOTm73bQIAAMHF0mN2OnfurEGDBvmMRUdHq1u3bt7x6dOnKy8vT127dpXD4dDMmTPldDo1cuRISVJaWpqSk5M1bdo0LV68WC6XS/Pnz1dubq4iIyPbfZsAAEBwsfwA5a+zdOlShYSEKDMz0+eigheFhoZq48aNysnJkdPpVHR0tLKzs7Vo0SILUwMAgGARdGVn27ZtPs/tdrtWrlyplStXXvE1SUlJ2rx5cxsnAwAAV6OguKggAABAW6HsAAAAo1F2AACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNHCrA4AAKaob7I6ARAcgu13gbIDAAEyY2c3qyMAuAx2YwEAAKMxswMAAfLCHWcUGWp1CsB69U3BNdNJ2QGAAIkMFWUHCELsxgIAAEaj7AAAAKNRdgAAgNEoOwAAwGh+HaDc1NSktWvXqri4WFVVVWpubvZZ/vbbbwckHAAAQGv5VXZmzZqltWvXaty4cRo0aJBsNlugcwEAAASEX2WnoKBAv/3tb3XXXXcFOg8AAEBA+XXMTkREhPr37x/oLAAAAAHnV9mZM2eOli9fLo/HE+g8AAAAAeXXbqydO3dq69atKiws1E033aTw8HCf5X/4wx8CEg4AAKC1/Co7Xbp00aRJkwKdBQAAIOD8Kjtr1qwJdA4AAIA24fdFBS9cuKC33npLq1atUm1trSSpoqJCdXV1AQsHAADQWn7N7Pz3f/+37rzzTpWVlam+vl7f+c531LlzZ/3nf/6n6uvr9fLLLwc6JwAAgF/8mtmZNWuWhg0bps8++0xRUVHe8UmTJqm4uDhg4QAAAFrLr5mdv/71ryopKVFERITPeN++ffXpp58GJBgAAEAg+DWz09zcrKampkvGT548qc6dO7c6FAAAQKD4NbOTlpamZcuW6ZVXXpEk2Ww21dXVacGCBdxC4us0NVqdAAgO/C4AaCd+lZ3nn39e6enpSk5Oltvt1tSpU3X06FF1795dr7/+eqAzGqXzgQKrIwAA0KH4VXb69OmjAwcOqKCgQB988IHq6uo0ffp0ZWVl+RywDAAAYDW/yo4khYWF6f777w9klg6h9ubvS6HhX78iYLqmRmY6AbSLFpedP//5zy1+07vvvtuvMB1CaDhlBwCAdtTisjNx4sQWrWez2S57phYAAIAVWlx2mpub2zIHAABAm/D73lgtkZKSovLy8rb8CAAAgK/UpmXnk08+UWMj19IAAADWadOyAwAAYDXKDgAAMBplBwAAGI2yAwAAjEbZAQAARvOr7Jw8efKKy3bv3u39edWqVYqLi/PnIwAAAALCr7KTlpams2fPXjK+a9cu3Xnnnd7nU6dOVXR0tP/pAAAAWsmvsjNy5EilpaWptrbWO7Zjxw7dddddWrBgQcDCAQAAtJZfZeeXv/ylEhMTNX78eNXX12vr1q0aN26cFi1apNmzZwc6IwAAgN/8KjshISEqKChQeHi4xowZo7vvvlv5+fmaNWtWoPMBAAC0SotvBPrBBx9cMrZw4UJNmTJF999/v0aNGuVdZ/DgwYFLCAAA0AotLjtDhgyRzWaTx+Pxjl18vmrVKr3yyivyeDyy2Wxqampqk7AAAAD/rBaXnRMnTrRlDgAAgDbR4rKTlJTUljkAAADaRIvLzj86evSotm7dqqqqKjU3N/sse+KJJ1odDAAAIBD8Kju/+MUvlJOTo+7duys+Pl42m827zGazUXYAAEDQ8OvU86efflrPPPOMXC6X9u/fr/fff9/7eO+991r8Pi+99JIGDx4sh8Mhh8Mhp9OpwsJC73K3263c3Fx169ZNMTExyszMVGVlpc97lJWVady4cerUqZN69uypxx57TBcuXPBnswAAgIH8KjufffaZ7r333lZ/eJ8+ffTcc89p3759evfddzVmzBhNmDBBhw4dkiTNnj1bf/nLX7R+/Xpt375dFRUVmjx5svf1TU1NGjdunBoaGlRSUqJXX31Va9euZWYJAAB4+VV27r33Xm3ZsqXVHz5+/Hjddddd+sY3vqEbbrhBzzzzjGJiYrR7925VV1dr9erVWrJkicaMGaOhQ4dqzZo1Kikp8d5sdMuWLTp8+LB+/etfa8iQIcrIyNBTTz2llStXqqGhodX5AADA1c+vY3b69++vxx9/XLt371ZKSorCw8N9lj/88MP/9Hs2NTVp/fr1On/+vJxOp/bt26fGxkalpqZ61xkwYIASExNVWlqqkSNHqrS0VCkpKT53Vk9PT1dOTo4OHTqkb37zm5f9rPr6etXX13uf19TU/NN5AQDA1cGvsvPKK68oJiZG27dv1/bt232W2Wy2f6rsHDx4UE6nU263WzExMdqwYYOSk5O1f/9+RUREqEuXLj7rx8XFyeVySZJcLpdP0bm4/OKyK8nPz9eTTz7Z4owAAODq5VfZCeQFBm+88Ubt379f1dXV+t3vfqfs7OxLClSgzZs3T3l5ed7nNTU1SkhIaNPPvMjWfEGer18NbcXjkZr/7wD2kDDpS2cSon3ZmjmRAED78Ps6O4ESERGh/v37S5KGDh2qvXv3avny5brvvvvU0NCgc+fO+czuVFZWKj4+XpIUHx+vd955x+f9Lp6tdXGdy4mMjFRkZGSAt6RlYva/bsnnAgDQUflVdn74wx9+5fJf/epXfoWRpObmZtXX12vo0KEKDw9XcXGxMjMzJUlHjhxRWVmZnE6nJMnpdOqZZ55RVVWVevbsKUkqKiqSw+FQcnKy3xkAwB/1TTaJuVvLeDxSw/9d4zYihIlbK33xuxA8/Co7n332mc/zxsZG/e1vf9O5c+c0ZsyYFr/PvHnzlJGRocTERNXW1mrdunXatm2b3nzzTcXGxmr69OnKy8tT165d5XA4NHPmTDmdTo0cOVKSlJaWpuTkZE2bNk2LFy+Wy+XS/PnzlZuba9nMzeXY7Xaf6wfBOm63W5MmTZIkbdiwQXa73eJEkGTMv8OMnV2tjgDgMvwqOxs2bLhkrLm5WTk5Obr++utb/D5VVVV64IEHdOrUKcXGxmrw4MF688039Z3vfEeStHTpUoWEhCgzM1P19fVKT0/Xiy++6H19aGioNm7cqJycHDmdTkVHRys7O1uLFi3yZ7PajM1mU1RUlNUx8A/sdjv/LgDQAdg8Hk/A5lyPHDmib3/72zp16lSg3rJd1NTUKDY2VtXV1XI4HFbHQRv6/PPPlZGRIUkqLCyk7KDVPB6P3G631TEgZm6Dld1u97mtVCC19O93QA9QPn78OLdqANChMHMbnJi5xZf5VXa+fNq29MU3m1OnTmnTpk3Kzs4OSDAAAIBA8KvsvP/++z7PQ0JC1KNHDz3//PNfe6YWAABAe/Kr7GzdujXQOQAAANqEXzcCBQAAuFr4VXYqKys1bdo09e7dW2FhYQoNDfV5AAAABAu/dmP94Ac/UFlZmR5//HH16tWrzU4pAwAAaC2/ys7OnTv117/+VUOGDAlwHAAAgMDyazdWQkKCAngtQgAAgDbjV9lZtmyZ5s6dq08++STAcQAAAAKrxbuxrrnmGp9jc86fP6/rr79enTp1Unh4uM+6Z8+eDVxCAACAVmhx2Vm2bFkbxgAAAGgbLS473AYCAABcjfw6Zmfz5s168803LxnfsmWLCgsLWx0KAAAgUPwqO3PnzlVTU9Ml483NzZo7d26rQwEAAASKX2Xn6NGjSk5OvmR8wIABOnbsWKtDAQAABIpfZSc2NlYff/zxJePHjh1TdHR0q0MBAAAEil9lZ8KECXrkkUd0/Phx79ixY8c0Z84c3X333QELBwAA0Fp+lZ3FixcrOjpaAwYMUL9+/dSvXz8NHDhQ3bp1089+9rNAZwQAAPCbX/fGio2NVUlJiYqKinTgwAFFRUVp8ODBGjVqVKDzAQAAtIpfZUeSbDab0tLSlJaWdsV1UlJStHnzZiUkJPj7MQAAAK3i126slvrkk0/U2NjYlh8BAADwldq07AAAAFiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNHatOysWrVKcXFxbfkRAAAAX8mv6+ysWLHisuM2m012u139+/fXqFGjNHXq1FaFAwAAaC2/ys7SpUt1+vRp/e///q+uueYaSdJnn32mTp06KSYmRlVVVbruuuu0detWLigIAAAs5ddurGeffVbDhw/X0aNHdebMGZ05c0Z///vfNWLECC1fvlxlZWWKj4/X7NmzA50XAADgn+LXzM78+fP1+9//Xtdff713rH///vrZz36mzMxMffzxx1q8eLEyMzMDFhQAAMAffs3snDp1ShcuXLhk/MKFC3K5XJKk3r17q7a2tnXpAAAAWsmvsjN69Gj9+7//u95//33v2Pvvv6+cnByNGTNGknTw4EH169cvMCkBAAD85NdurNWrV2vatGkaOnSowsPDJX0xqzN27FitXr1akhQTE6Pnn38+cElhKY/HI7fbbXWMVvvyNpiwPXa7XTabzeoYABDU/Co78fHxKioq0kcffaS///3vkqQbb7xRN954o3ed0aNHByYhgoLb7VZGRobVMQJq0qRJVkdotcLCQkVFRVkdAwCCml9lZ+fOnbrjjjs0YMAADRgwINCZAAAAAsavsjNmzBhde+21mjJliu6//34lJycHOheCjN1uV2FhodUxWs3j8ai+vl6SFBkZedXvArLb7VZHAICg51fZqaioUEFBgV5//XU999xzGjx4sLKysjRlyhT16dMn0BkRBGw2mzG7Szp16mR1BABAO/LrbKzu3btrxowZ2rVrl44fP657771Xr776qvr27es9GwsAACAYtPpGoP369dPcuXP13HPPKSUlRdu3bw9ELgAAgIBoVdnZtWuXfvSjH6lXr16aOnWqBg0apE2bNgUqGwAAQKv5dczOvHnzVFBQoE8//VRpaWlavny5JkyYwLEQAAAg6PhVdnbs2KHHHntM3/ve99S9e/dAZwIAAAgYv8rOrl27JEmHDx/Wu+++q4aGBp/ld999d+uTAQAABIBfZefEiROaNGmSPvjgA9lsNnk8HknyXrOkqakpcAkBAABawa8DlB9++GH17dtXVVVV6tSpkw4dOqQdO3Zo2LBh2rZtW4AjAgAA+M+vmZ3S0lK9/fbb6t69u0JCQhQSEqI77rhD+fn5evjhh33uhg4AAGAlv2Z2mpqa1LlzZ0lfXGCwoqJCkpSUlKQjR44ELh0AAEAr+TWzM2jQIB04cED9+vXTiBEjtHjxYkVEROiVV17RddddF+iMAAAAfvOr7MyfP1/nz5+XJC1atEjf/e539a1vfUvdunXTb37zm4AGBAAAaA2/yk56err35/79++ujjz7S2bNndc0111z1d5EGAABm8avsXE7Xrl0D9VYAAAAB0+obgQIAAAQzyg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGiUHQAAYDTKDgAAMBplBwAAGI2yAwAAjEbZAQAARrO07OTn52v48OHq3LmzevbsqYkTJ+rIkSM+67jdbuXm5qpbt26KiYlRZmamKisrfdYpKyvTuHHj1KlTJ/Xs2VOPPfaYLly40J6bAgAAgpSlZWf79u3Kzc3V7t27VVRUpMbGRqWlpen8+fPedWbPnq2//OUvWr9+vbZv366KigpNnjzZu7ypqUnjxo1TQ0ODSkpK9Oqrr2rt2rV64oknrNgkAAAQZGwej8djdYiLTp8+rZ49e2r79u0aNWqUqqur1aNHD61bt0733HOPJOmjjz7SwIEDVVpaqpEjR6qwsFDf/e53VVFRobi4OEnSyy+/rJ/85Cc6ffq0IiIivvZza2pqFBsbq+rqajkcjjbdRgBA2/j888+VkZEhSSosLFRUVJTFidDWWvr3O6iO2amurpYkde3aVZK0b98+NTY2KjU11bvOgAEDlJiYqNLSUklSaWmpUlJSvEVHktLT01VTU6NDhw5d9nPq6+tVU1Pj8wAAAGYKmrLT3NysRx55RLfffrsGDRokSXK5XIqIiFCXLl181o2Li5PL5fKu8+Wic3H5xWWXk5+fr9jYWO8jISEhwFsDAACCRdCUndzcXP3tb39TQUFBm3/WvHnzVF1d7X2Ul5e3+WcCAABrhFkdQJJmzJihjRs3aseOHerTp493PD4+Xg0NDTp37pzP7E5lZaXi4+O967zzzjs+73fxbK2L6/yjyMhIRUZGBngrAABAMLJ0Zsfj8WjGjBnasGGD3n77bfXr189n+dChQxUeHq7i4mLv2JEjR1RWVian0ylJcjqdOnjwoKqqqrzrFBUVyeFwKDk5uX02BAAABC1LZ3Zyc3O1bt06/elPf1Lnzp29x9jExsYqKipKsbGxmj59uvLy8tS1a1c5HA7NnDlTTqdTI0eOlCSlpaUpOTlZ06ZN0+LFi+VyuTR//nzl5uYyewMAAKwtOy+99JIk6dvf/rbP+Jo1a/SDH/xAkrR06VKFhIQoMzNT9fX1Sk9P14svvuhdNzQ0VBs3blROTo6cTqeio6OVnZ2tRYsWtddmAACAIBZU19mxCtfZAYCrH9fZ6XiuyuvsAAAABBplBwAAGI2yAwAAjEbZAQAARqPsAAAAo1F2AACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGiUHQAAYDTKDgAAMFqY1QEAANbyeDxyu91Wx2i1L2+DCdtjt9tls9msjmEEyg4AdHBut1sZGRlWxwioSZMmWR2h1QoLCxUVFWV1DCOwGwsAABiNmR0A6ODsdrsKCwutjtFqHo9H9fX1kqTIyMirfheQ3W63OoIxKDsA0MHZbDZjdpd06tTJ6ggIQuzGAgAARqPsAAAAo1F2AACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGiUHQCAMUpKSnTfffeppKTE6igIIpQdAIAR3G63lixZosrKSi1ZskRut9vqSAgSlB0AgBFee+01nTlzRpJ05swZrVu3zuJECBaUHQDAVe/kyZNat26dPB6PJMnj8WjdunU6efKkxckQDCg7AICrmsfj0fLly9Xc3Owz3tTUpOXLl3sLEDouyg4A4KpWVlamvXv3XlJqPB6P9u7dq7KyMouSIVhQdgAAV7WEhAQ5HI7LLnM4HEpISGjnRAg2lB0AwFWtvLxcNTU1l11WU1Oj8vLydk6EYEPZAQBc1RITEzV8+HDZbDafcZvNpltvvVWJiYkWJUOwoOwAAK5qNptNs2bNuqTshISEXHYcHQ9lBwBw1evTp4+ysrJ8xrKysnTttddalAjBhLIDADBCVlaWunfvLknq0aOHpk6danEiBAvKDgDACHa7XXl5eYqLi9Ps2bNlt9utjoQgEWZ1AAAAAuW2227TbbfdZnUMBBlmdgAAgNEoOwAAwGiWlp0dO3Zo/Pjx6t27t2w2m/74xz/6LPd4PHriiSfUq1cvRUVFKTU1VUePHvVZ5+zZs8rKypLD4VCXLl00ffp01dXVteNWAACAYGZp2Tl//rxuvvlmrVy58rLLFy9erBUrVujll1/Wnj17FB0drfT0dLndbu86WVlZOnTokIqKirRx40bt2LFDDz30UHttAgAACHI2T5DcDtZms2nDhg2aOHGipC9mdXr37q05c+bo0UcflSRVV1crLi5Oa9eu1fe//319+OGHSk5O1t69ezVs2DBJ0htvvKG77rpLJ0+eVO/evVv02TU1NYqNjVV1dfUV768CAACCS0v/fgftMTsnTpyQy+VSamqqdyw2NlYjRoxQaWmpJKm0tFRdunTxFh1JSk1NVUhIiPbs2XPF966vr1dNTY3PAwAAmCloy47L5ZIkxcXF+YzHxcV5l7lcLvXs2dNneVhYmLp27epd53Ly8/MVGxvrfXBHXAAAzBW0ZactzZs3T9XV1d4Hd8QFAMBcQVt24uPjJUmVlZU+45WVld5l8fHxqqqq8ll+4cIFnT171rvO5URGRsrhcPg8AACAmYK27PTr10/x8fEqLi72jtXU1GjPnj1yOp2SJKfTqXPnzmnfvn3edd5++201NzdrxIgR7Z4ZAAAEH0tvF1FXV6djx455n584cUL79+9X165dlZiYqEceeURPP/20vvGNb6hfv356/PHH1bt3b+8ZWwMHDtSdd96pf/u3f9PLL7+sxsZGzZgxQ9///vdbfCYWAAAwm6Vl591339Xo0aO9z/Py8iRJ2dnZWrt2rX784x/r/Pnzeuihh3Tu3DndcccdeuONN3xu7vbaa69pxowZGjt2rEJCQpSZmakVK1a0+7YAAIDgFDTX2bES19kBAODqc9VfZwcAACAQKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7KDDKSkp0X333aeSkhKrowAA2gFlBx2K2+3WkiVLVFlZqSVLlsjtdlsdCUAA8WUGl0PZQYfy2muv6cyZM5KkM2fOaN26dRYnAhAofJnBlVB20GGcPHlS69at08U7pHg8Hq1bt04nT560OBmAQODLDK6EsoMOwePxaPny5Vcc5xZxwNWNLzP4KpQddAhlZWXau3evmpqafMabmpq0d+9elZWVWZQMQGvxZQZfh7KDDiExMVHDhw9XaGioz3hoaKhuvfVWJSYmWpQMQGvxZQZfh7KDDsFms2nWrFlXHLfZbBakAhAIfJnB16HsoMPo06ePpk6d6i02NptNU6dO1bXXXmtxMgCtwZcZfB3KDjqUrKwsdevWTZLUvXt3TZ061eJEAAKBLzP4KpQddCh2u115eXmKi4vT7NmzZbfbrY4EIED4MoMrsXk4TF01NTWKjY1VdXW1HA6H1XEAAH4qKSnR8uXLNWvWLN12221Wx0Eba+nf77B2zAQAQJu67bbbKDm4BLuxAACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAAIDRuIKypIt3zKipqbE4CQAAaKmLf7e/7s5XlB1JtbW1kqSEhASLkwAAgH9WbW2tYmNjr7icG4FKam5uVkVFhTp37iybzWZ1HLSxmpoaJSQkqLy8nBu/Aobh97tj8Xg8qq2tVe/evRUScuUjc5jZkRQSEqI+ffpYHQPtzOFw8H+GgKH4/e44vmpG5yIOUAYAAEaj7AAAAKNRdtDhREZGasGCBYqMjLQ6CoAA4/cbl8MBygAAwGjM7AAAAKNRdgAAgNEoOwAAwGiUHQAAYDTKDjqUlStXqm/fvrLb7RoxYoTeeecdqyMBCIAdO3Zo/Pjx6t27t2w2m/74xz9aHQlBhLKDDuM3v/mN8vLytGDBAr333nu6+eablZ6erqqqKqujAWil8+fP6+abb9bKlSutjoIgxKnn6DBGjBih4cOH64UXXpD0xT3REhISNHPmTM2dO9fidAACxWazacOGDZo4caLVURAkmNlBh9DQ0KB9+/YpNTXVOxYSEqLU1FSVlpZamAwA0NYoO+gQ/ud//kdNTU2Ki4vzGY+Li5PL5bIoFQCgPVB2AACA0Sg76BC6d++u0NBQVVZW+oxXVlYqPj7eolQAgPZA2UGHEBERoaFDh6q4uNg71tzcrOLiYjmdTguTAQDaWpjVAYD2kpeXp+zsbA0bNky33nqrli1bpvPnz+vBBx+0OhqAVqqrq9OxY8e8z0+cOKH9+/era9euSkxMtDAZggGnnqNDeeGFF/Rf//VfcrlcGjJkiFasWKERI0ZYHQtAK23btk2jR4++ZDw7O1tr165t/0AIKpQdAABgNI7ZAQAARqPsAAAAo1F2AACA0Sg7AADAaJQdAABgNMoOAAAwGmUHAAAYjbIDAACMRtkBAABGo+wAAACjUXYAQFJDQ4PVEQC0EcoOgKD2u9/9TikpKYqKilK3bt2Umpqq8+fPS5J+9atf6aabblJkZKR69eqlGTNmeF9XVlamCRMmKCYmRg6HQ9/73vdUWVnpXb5w4UINGTJEv/zlL9WvXz/Z7XZJ0rlz5/Sv//qv6tGjhxwOh8aMGaMDBw6070YDCCjKDoCgderUKU2ZMkU//OEP9eGHH2rbtm2aPHmyPB6PXnrpJeXm5uqhhx7SwYMH9ec//1n9+/eXJDU3N2vChAk6e/astm/frqKiIn388ce67777fN7/2LFj+v3vf68//OEP2r9/vyTp3nvvVVVVlQoLC7Vv3z7dcsstGjt2rM6ePdvemw8gQLjrOYCg9d5772no0KH65JNPlJSU5LPs2muv1YMPPqinn376ktcVFRUpIyNDJ06cUEJCgiTp8OHDuummm/TOO+9o+PDhWrhwoZ599ll9+umn6tGjhyRp586dGjdunKqqqhQZGel9v/79++vHP/6xHnrooTbcWgBtJczqAABwJTfffLPGjh2rlJQUpaenKy0tTffcc48aGxtVUVGhsWPHXvZ1H374oRISErxFR5KSk5PVpUsXffjhhxo+fLgkKSkpyVt0JOnAgQOqq6tTt27dfN7v888/1/Hjx9tgCwG0B8oOgKAVGhqqoqIilZSUaMuWLfr5z3+un/70pyouLg7I+0dHR/s8r6urU69evbRt27ZL1u3SpUtAPhNA+6PsAAhqNptNt99+u26//XY98cQTSkpKUlFRkfr27avi4mKNHj36ktcMHDhQ5eXlKi8v99mNde7cOSUnJ1/xs2655Ra5XC6FhYWpb9++bbVJANoZZQdA0NqzZ4+Ki4uVlpamnj17as+ePTp9+rQGDhyohQsX6j/+4z/Us2dPZWRkqLa2Vrt27dLMmTOVmpqqlJQUZWVladmyZbpw4YJ+9KMf6V/+5V80bNiwK35eamqqnE6nJk6cqMWLF+uGG25QRUWFNm3apEmTJn3lawEEL8oOgKDlcDi0Y8cOLVu2TDU1NUpKStLzzz+vjIwMSZLb7dbSpUv16KOPqnv37rrnnnskfTEb9Kc//UkzZ87UqFGjFBISojvvvFM///nPv/LzbDabNm/erJ/+9Kd68MEHdfr0acXHx2vUqFGKi4tr8+0F0DY4GwsAABiN6+wAAACjUXYAAIDRKDsAAMBolB0AAGA0yg4AADAaZQcAABiNsgMAAIxG2QEAAEaj7AAAAKNRdgAAgNEoOwAAwGj/D1KB2Cj6mSj+AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='score', y='avg_chunk_len', data=pred_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "7c23c763-f6cf-43b1-9659-8617ce1ddb8e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T14:15:06.497159Z",
     "iopub.status.busy": "2024-12-04T14:15:06.496994Z",
     "iopub.status.idle": "2024-12-04T14:15:06.569527Z",
     "shell.execute_reply": "2024-12-04T14:15:06.569078Z",
     "shell.execute_reply.started": "2024-12-04T14:15:06.497146Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='score', ylabel='max_chunk_len'>"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1AElEQVR4nO3de3RU9b3//9ckkEwEJikBJmASpNUKacP9NqCeKpGIaJWLgsmhqXL0SAMi8UL5VvFa8NDVKFaQQ0vFrjJStGILGiSkAtbEEEJRRKXacjrEMIkmJgMsJ4HM/P7wl1mOEEknM9mTnedjrVmL+ew9s99bGOc1n70/n4/F7/f7BQAAYFIxRhcAAAAQSYQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaj2MLiAa+Hw+VVdXq0+fPrJYLEaXAwAA2sHv9+vEiRMaNGiQYmLa7r8h7Eiqrq5WWlqa0WUAAIAQHDt2TKmpqW1uJ+xI6tOnj6Qv/2PZbDaDqwEAAO3h8XiUlpYW+B5vi6Fh56KLLtK//vWvs9p/8pOfaM2aNfJ6vbrnnnu0efNmNTU1KTs7W2vXrpXdbg/s63K5tGDBAr3xxhvq3bu38vLytHLlSvXo0f5Ta710ZbPZCDsAAHQx57sFxdAblCsqKnT8+PHAo7i4WJJ00003SZKWLFmibdu26cUXX9SePXtUXV2tmTNnBl7f0tKi6dOnq7m5WaWlpXr++ee1ceNGLV++3JDzAQAA0ccSTQuB3n333dq+fbs++ugjeTwe9e/fX06nU7Nnz5Ykffjhhxo2bJjKyso0ceJEFRUV6brrrlN1dXWgt2fdunVaunSpPv30U8XFxbXruB6PR4mJiWpsbKRnBwCALqK9399RM/S8ublZv//973XbbbfJYrGosrJSp0+fVlZWVmCfoUOHKj09XWVlZZKksrIyZWZmBl3Wys7Olsfj0eHDh9s8VlNTkzweT9ADAACYU9SEnVdeeUUNDQ368Y9/LElyu92Ki4tTUlJS0H52u11utzuwz1eDTuv21m1tWblypRITEwMPRmIBAGBeURN2NmzYoGnTpmnQoEERP9ayZcvU2NgYeBw7dizixwQAAMaIiqHn//rXv7Rr1y69/PLLgbaUlBQ1NzeroaEhqHenpqZGKSkpgX327dsX9F41NTWBbW2Jj49XfHx8GM8AAABEq6jo2Xnuuec0YMAATZ8+PdA2ZswY9ezZUyUlJYG2I0eOyOVyyeFwSJIcDocOHTqk2trawD7FxcWy2WzKyMjovBMAAABRy/CeHZ/Pp+eee055eXlBc+MkJiZq/vz5KigoUN++fWWz2bRo0SI5HA5NnDhRkjR16lRlZGRo3rx5WrVqldxutx544AHl5+fTcwMAACRFQdjZtWuXXC6XbrvttrO2Pfnkk4qJidGsWbOCJhVsFRsbq+3bt2vBggVyOBzq1auX8vLy9Oijj3bmKQAAgCgWVfPsGIV5dgDAHEpLS7V69WotXrxYkyZNMrocRFiXm2cHAICO8Hq9KiwsVE1NjQoLC+X1eo0uCVGCsAMAMIVNmzaprq5OklRXVyen02lwRYgWhB0AQJdXVVUlp9Op1jsz/H6/nE6nqqqqDK4M0YCwAwDo0vx+v1avXt1mO7emgrADAOjSXC6XKioq1NLSEtTe0tKiiooKuVwugypDtCDsAAC6tPT0dI0bN06xsbFB7bGxsRo/frzS09MNqgzRgrADAOjSLBaLFi9e3Ga7xWIxoCpEE8IOup0NGzboqquu0oYNG4wuBUCYpKamKicnJ6gtJydHF154oUEVIZoQdtCtNDQ0aNOmTfL5fNq0aZMaGhqMLglAmFx33XVBz7+63iK6N8IOupUHH3xQPp9P0pfrsi1fvtzgigCEy+OPPx70/Oc//7lBlSDaEHbQbezfv1+HDh0Kanv33Xe1f/9+gyoCEC58vvFNWBtLrI3VHfh8Pt14443yeDxnbbPZbHrllVcUE0P2B7oiPt/dF2tjAV9RXl5+zv8RSl9+WMrLyzu5IgDhwucb50PYQbcwYcKENlN/YmKiJkyY0MkVAQgXPt84H8IOuoWYmJg2b0Z+6KGH6OIGujA+3zgf/gWg2xg7dqwyMzOD2oYPH67Ro0cbVBGAcOHzjW9C2EG38thjjwV+5cXExOjRRx81uCIA4cLnG20h7KBbSUpKUm5urmJiYpSbm6ukpCSjSwIQJny+0RaGnouh5wAAdEUMPQcAABBhBwAAmBxhBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmJrhYeeTTz7Rf/7nfyo5OVkJCQnKzMzU/v37A9v9fr+WL1+ugQMHKiEhQVlZWfroo4+C3qO+vl65ubmy2WxKSkrS/PnzdfLkyc4+FQAAEIUMDTuff/65Jk+erJ49e6qoqEjvv/++fvnLX+pb3/pWYJ9Vq1bp6aef1rp161ReXq5evXopOztbXq83sE9ubq4OHz6s4uJibd++XXv37tUdd9xhxCkBAIAoY/H7/X6jDv7Tn/5Ub731lt58881zbvf7/Ro0aJDuuece3XvvvZKkxsZG2e12bdy4UXPnztUHH3ygjIwMVVRUaOzYsZKkHTt26Nprr1VVVZUGDRp03jo8Ho8SExPV2Ngom80WvhMEAAAR097vb0N7dv785z9r7NixuummmzRgwACNGjVKv/71rwPbjx49KrfbraysrEBbYmKiJkyYoLKyMklSWVmZkpKSAkFHkrKyshQTE6Py8vJzHrepqUkejyfoAQAAzMnQsPPPf/5Tzz77rC655BK9/vrrWrBgge666y49//zzkiS32y1JstvtQa+z2+2BbW63WwMGDAja3qNHD/Xt2zewz9etXLlSiYmJgUdaWlq4Tw0AAEQJQ8OOz+fT6NGjtWLFCo0aNUp33HGHbr/9dq1bty6ix122bJkaGxsDj2PHjkX0eAAAwDiGhp2BAwcqIyMjqG3YsGFyuVySpJSUFElSTU1N0D41NTWBbSkpKaqtrQ3afubMGdXX1wf2+br4+HjZbLagBwAAMCdDw87kyZN15MiRoLa///3vGjx4sCRpyJAhSklJUUlJSWC7x+NReXm5HA6HJMnhcKihoUGVlZWBff7yl7/I5/NpwoQJnXAWAAAgmvUw8uBLlizRpEmTtGLFCt18883at2+f1q9fr/Xr10uSLBaL7r77bj3++OO65JJLNGTIED344IMaNGiQbrzxRklf9gRdc801gctfp0+f1sKFCzV37tx2jcQCAADmZujQc0navn27li1bpo8++khDhgxRQUGBbr/99sB2v9+vhx56SOvXr1dDQ4Muu+wyrV27Vt/97ncD+9TX12vhwoXatm2bYmJiNGvWLD399NPq3bt3u2pg6DkAAF1Pe7+/DQ870YCwAwBA19Ml5tkBAACINMIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwtR5GFwAAMJbf75fX6zW6jA7z+/1qamqSJMXHx8tisRhcUcdYrdYufw7RgrADAN2c1+vVtGnTjC4DX1NUVKSEhASjyzAFLmMBAABTo2cHALo5q9WqoqIio8voMK/XqxkzZkiStm7dKqvVanBFHdPV648mhB0A6OYsFovpLpdYrVbTnRNCx2UsAABgaoQdAABgaoQdAABgaoQdAABgatygjHZh0rHoxKRjAHB+hB20C5OORScmHQOA8+MyFgAAMDV6dtAuTDoWnbp6/QDQGQg7aBcmHQMAdFVcxgIAAKZG2AEAAKZG2AEAAKZG2AEAAKbGDcqdwCwT8pnBV/8e+DuJHkyOCCCSCDudgAn5olPrEHQYrytPjsiPmejBj5noFA0/ZgwNOw8//LAeeeSRoLZLL71UH374oaQv/7Hec8892rx5s5qampSdna21a9fKbrcH9ne5XFqwYIHeeOMN9e7dW3l5eVq5cqV69CDHAYg8fsxEJ37MRI9o+DFjeCL43ve+p127dgWefzWkLFmyRK+++qpefPFFJSYmauHChZo5c6beeustSVJLS4umT5+ulJQUlZaW6vjx4/rRj36knj17asWKFZ1+Lu1xcuQt8scY/p+9+/L7Jd+ZL/8c00Pi0olhLL4z6n3wBaPLANANGP6t26NHD6WkpJzV3tjYqA0bNsjpdOqqq66SJD333HMaNmyY3n77bU2cOFE7d+7U+++/r127dslut2vkyJF67LHHtHTpUj388MOKi4vr7NM5L39MDym2p9FldHPR9++iO/IbXUAEPHNZveJjzXhmXYPfLzX7vvxzXAy/ZYzU1GLRwr/2NbqMAMPDzkcffaRBgwbJarXK4XBo5cqVSk9PV2VlpU6fPq2srKzAvkOHDlV6errKyso0ceJElZWVKTMzM+iyVnZ2thYsWKDDhw9r1KhR5zxmU1NTYOVrSfJ4PJE7wa9rOd15xwKimQk/C/GxfsXHGl1F98YCKtEiukK/oWFnwoQJ2rhxoy699FIdP35cjzzyiC6//HK99957crvdiouLU1JSUtBr7Ha73G63JMntdgcFndbtrdvasnLlyrPuFeosfd7ZbMhxAQDorgwNO1+9qW/48OGaMGGCBg8erC1btkT0ZqZly5apoKAg8Nzj8SgtLS1ixwMAAMYx/DLWVyUlJem73/2uPv74Y1199dVqbm5WQ0NDUO9OTU1N4B6flJQU7du3L+g9ampqAtvaEh8fr/j4+PCfQDucGDGXe3YASWo5TU8ngE4RVWHn5MmT+sc//qF58+ZpzJgx6tmzp0pKSjRr1ixJ0pEjR+RyueRwOCRJDodDP//5z1VbW6sBAwZIkoqLi2Wz2ZSRkWHYeXyj2J6EHQAAOpGhYefee+/V9ddfr8GDB6u6uloPPfSQYmNjdcsttygxMVHz589XQUGB+vbtK5vNpkWLFsnhcGjixImSpKlTpyojI0Pz5s3TqlWr5Ha79cADDyg/P9+wnhsAABBdDA07VVVVuuWWW1RXV6f+/fvrsssu09tvv63+/ftLkp588knFxMRo1qxZQZMKtoqNjdX27du1YMECORwO9erVS3l5eXr00UeNOiUA3VhTi9EVANEh2j4LhoadzZu/+Xq91WrVmjVrtGbNmjb3GTx4sF577bVwlwYA/7aFf002ugQA58Cq5wAAwNSi6gZlAOjKnrmsjkkFAX15GSuaejpDDjsNDQ3at2+famtr5fP5grb96Ec/6nBhANDVxMeKsANEoZDCzrZt25Sbm6uTJ0/KZrMFLd1usVgIOwAAIGqEdM/OPffco9tuu00nT55UQ0ODPv/888Cjvr4+3DUCAACELKSenU8++UR33XWXLrjggnDXY3oW35koWx6tm/H7Jd+ZL/8c04NlkQ1kaf17AIAICynsZGdna//+/fr2t78d7npMr/fBF4wuAQCAbiWksDN9+nTdd999ev/995WZmamePYOXP/jhD38YluIAAAA6KqSwc/vtt0vSOWcqtlgsammJsqkTDWa1WlVUVGR0GZDk9Xo1Y8YMSdLWrVtltVoNrgiS+HsAEFEhhZ2vDzXHN7NYLEpISDC6DHyN1Wrl7wUAuoEOTyro9Xr5VdYN+P1+eb1eo8vosK+egxnOx2q1Bk39AAA4W0hhp6WlRStWrNC6detUU1Ojv//97/r2t7+tBx98UBdddJHmz58f7jphMK/Xq2nTphldRli1Xs7qyoqKiuidAoDzCGmenZ///OfauHGjVq1apbi4uED797//ff3mN78JW3EAAAAdFVLPzu9+9zutX79eU6ZM0Z133hloHzFihD788MOwFYfoYZabrP1+v5qamiRJ8fHxXf4SEJeQAeD8Qp5U8OKLLz6r3efz6fTp0x0uCtHHTDdZMxkmAHQvIV3GysjI0JtvvnlW+0svvaRRo0Z1uCgAAIBwCalnZ/ny5crLy9Mnn3win8+nl19+WUeOHNHvfvc7bd++Pdw1AgAAhCyknp0bbrhB27Zt065du9SrVy8tX75cH3zwgbZt26arr7463DUCAACELOR5di6//HIVFxeHsxYAAICwC6lnBwAAoKtod8/Ot771rXYP062vrw+5IAAAgHBqd9h56qmnIlgGAABAZLQ77OTl5f3bb/7EE0/ozjvvVFJS0r/9WgAAgHCI6D07K1as4JIWAAAwVETDjt/vj+TbAwAAnBejsQAAgKkRdgAAgKkRdgAAgKkRdgAAgKmFPex88cUXgT9ffvnlSkhICPchgA4pLS3VnDlzVFpaanQpAIBOEFLYueuuu87ZfurUKV177bWB56+99poGDhwYWmVABHi9XhUWFqqmpkaFhYXyer1GlwQAiLCQws6rr76qhx56KKjt1KlTuuaaa3TmzJmwFAZEwqZNm1RXVydJqqurk9PpNLgiAECkhRR2du7cqV//+teBJSROnDihq6++WhaLRTt27AhnfUDYVFVVyel0BuZ/8vv9cjqdqqqqMrgyAEAktXu5iK/6zne+ox07dujKK69UTEyMXnjhBcXHx+vVV19Vr169wl0j0GF+v1+rV69us33VqlXtXugWANC1hHyD8vDhw7V9+3b9v//3/3TBBReoqKiIoIOo5XK5VFFRoZaWlqD2lpYWVVRUyOVyGVQZACDS2t2zM2rUqHP+8o2Pj1d1dbUmT54caDtw4EB4qgPCJD09XePGjdOBAweCAk9sbKzGjBmj9PR0A6sDAERSu8POjTfeGMEygMiyWCxavHix8vLyztnOJSwAMK92h52vj74CuprU1FRlZGTo0KFDgbaMjAxdeOGFBlYFAIi0kG5QbtXc3Kza2lr5fL6gdi4JIBpVVVXp8OHDQW2HDx9WVVWVUlNTDaoKABBpId2g/Pe//z0wO/LgwYM1ZMgQDRkyRBdddJGGDBkS7hqBDmsddXWuy1WrV68ODEcHAJhPSD07t956q3r06KHt27dr4MCB3O+AqNc6Guvrvjoaa/DgwQZUBgCItJDCzsGDB1VZWamhQ4eGux4gIhiNBQDdV0iXsTIyMvTZZ5+FuxYgYlpHXbXVTu8kAJhXSD07//M//6P7779fK1asUGZmpnr27Bm03WazhaU4IJxSU1OVk5Oj3//+9/L7/bJYLMrJyWE0FsKmqcUiifu/jOL3S83//3iZuBiJ3zDG+fKzED1CCjtZWVmSpClTpgS1t36BfH2WWiBa5ObmqqioSJ999pn69eunnJwco0uCiSz8a1+jSwBwDiGFnTfeeCPcdQCdwmq1qqCgQKtXr9bixYtltVqNLgkAEGEWP2Nu5fF4lJiYqMbGRi7BAfi3+P1+eb1eo8uAJK/XqxkzZkiStm7dyo+ZKGG1WiN2X2R7v79D6tnZu3fvN26/4oorQnlboFOUlpYGenYmTZpkdDno4iwWixISEowuA19jtVr5e0FASGHnBz/4wVltX01t3LODaOX1elVYWKjPPvtMhYWFGj16NL/+AMDkQhp6/vnnnwc9amtrtWPHDo0bN047d+4Md41A2GzatCkwbcJnn30mp9NpcEUAgEgLKewkJiYGPfr166err746MCQ9VE888YQsFovuvvvuQJvX61V+fr6Sk5PVu3dvzZo1SzU1NUGvc7lcmj59ui644AINGDBA9913n86cORNyHTCnqqoqbdq0Kaht06ZNqqqqMqgiAEBnCCnstMVut+vIkSMhvbaiokL/+7//q+HDhwe1L1myRNu2bdOLL76oPXv2qLq6WjNnzgxsb2lp0fTp09Xc3KzS0lI9//zz2rhxo5YvX96hc4G5tK6N9fX78X0+H2tjAYDJhXTPzrvvvhv03O/36/jx43riiSc0cuTIf/v9Tp48qdzcXP3617/W448/HmhvbGzUhg0b5HQ6ddVVV0mSnnvuOQ0bNkxvv/22Jk6cqJ07d+r999/Xrl27ZLfbNXLkSD322GNaunSpHn74YcXFxYVyijCZttbG8vv9rI0FACYXUs/OyJEjNWrUKI0cOTLw52uvvVbNzc36zW9+82+/X35+vqZPnx6YrLBVZWWlTp8+HdQ+dOhQpaenq6ysTJJUVlamzMxM2e32wD7Z2dnyeDw6fPjwOY/X1NQkj8cT9IC5paWltTks0WazKS0trZMrAgB0lpB6do4ePRr0PCYmRv379w9pVMvmzZt14MCBc/7qdrvdiouLU1JSUlC73W6X2+0O7PPVoNO6vXXbuaxcuVKPPPLIv10ruq5jx461GWo9Ho+OHTtGzw4AmFRIYSdcXwrHjh3T4sWLVVxc3KnDf5ctW6aCgoLAc4/Hwy97k2td9Xz//v1B9+dYLBaNGzeOVc8BwMRCCjuSVFJSopKSEtXW1srn8wVt++1vf9uu96isrFRtba1Gjx4daGtpadHevXv1zDPP6PXXX1dzc7MaGhqCendqamqUkpIiSUpJSdG+ffuC3rd1tFbrPl8XHx+v+Pj4dtUIc2hd3TwvLy9oHqjY2FhWPQcAkwvpnp1HHnlEU6dOVUlJiT777LOz5t1prylTpujQoUM6ePBg4DF27Fjl5uYG/tyzZ0+VlJQEXnPkyBG5XC45HA5JksPh0KFDh1RbWxvYp7i4WDabTRkZGaGcHkyqddXz1mDDqucA0D2E1LOzbt06bdy4UfPmzevQwfv06aPvf//7QW29evVScnJyoH3+/PkqKChQ3759ZbPZtGjRIjkcDk2cOFGSNHXqVGVkZGjevHlatWqV3G63HnjgAeXn59N7g7Pk5ubqlVde0YkTJ9SnTx9WPQeAbiCksNPc3Nxpawo9+eSTiomJ0axZs9TU1KTs7GytXbs2sD02Nlbbt2/XggUL5HA41KtXL+Xl5enRRx/tlPrQ9XDJCgC6l5BWPV+6dKl69+6tBx98MBI1dTpWPe8+NmzYoN///vfy+/2yWCyaN2+ebrvtNqPLAhAGX3zxhaZNmyZJKioqYiHQbiDsq55/dfSSz+fT+vXrtWvXLg0fPlw9e/YM2rewsDCEkoHIqqqqktPpDIzG8vv9cjqdmjp1qlJTUw2uDgAQKe0OO3/729+CnrfOlPzee+8FtXOJANGodbmIttpXrVrFv10AMKl2h5033ngjknUAEdXWchEtLS0sFwEAJhfS0PPGxkbV19ef1V5fX8/SC4hKrZMKxsbGBrXHxsZq/PjxTCoIACYWUtiZO3euNm/efFb7li1bNHfu3A4XBYRb66SCbbVzCQsAzCuksFNeXq4rr7zyrPYf/OAHKi8v73BRQCQwqSAAdE8hhZ2mpiadOXPmrPbTp0/riy++6HBRQKTk5uYqOTlZktSvXz8mFQSAbiCksDN+/HitX7/+rPZ169ZpzJgxHS4KiBSr1aqCggLZ7XYtWbKkUxegBQAYI6QZlB9//HFlZWXpnXfe0ZQpUyR9uTBoRUWFdu7cGdYCgXCbNGlSp80ADgAwXkg9O5MnT1ZZWZnS0tK0ZcsWbdu2TRdffLHeffddXX755eGuEQAAIGQh9exIX04quGnTpm/c54knntCdd96ppKSkUA8DAADQISH17LTXihUrzjkfDwAAQGeJaNgJYY1RAACAsIpo2AEAADAaYQcAAJgaYQcAAJgaYQcAAJhaRMPO5ZdfroSEhEgeAgAA4BuFFHY2btx4zvYzZ85o2bJlgeevvfaaBg4cGFJhAAAA4RBS2Lnrrrt000036fPPPw+0HTlyRBMmTNALL7wQtuIAAAA6KqSw87e//U1VVVXKzMxUcXGx1qxZo9GjR2vo0KF65513wl0jAABAyEJaLuI73/mO3nrrLd1999265pprFBsbq+eff1633HJLuOsDAADokJBvUH711Ve1efNmORwOJSUlacOGDaqurg5nbQAAAB0WUtj57//+b910001aunSp3nzzTb377ruKi4tTZmamtmzZEu4aAQAAQhbSZay33npL5eXlGjFihCQpJSVFr732mtasWaPbbrtNN998c1iLBAAACFVIYaeyslLx8fFntefn5ysrK6vDRQEAAIRLSJexzhV0Wl166aUhFwMAABBuIfXsSNJLL72kLVu2yOVyqbm5OWjbgQMHOlwYAABAOITUs/P000/r1ltvld1u19/+9jeNHz9eycnJ+uc//6lp06aFu0YAAICQhRR21q5dq/Xr1+tXv/qV4uLidP/996u4uFh33XWXGhsbw10jAABAyEIKOy6XS5MmTZIkJSQk6MSJE5KkefPmsVwEAACIKiGFnZSUFNXX10uS0tPT9fbbb0uSjh49Kr/fH77qAAAAOiiksHPVVVfpz3/+syTp1ltv1ZIlS3T11Vdrzpw5mjFjRlgLBAAA6IiQRmOtX79ePp9P0pdz6/Tr109vvfWWfvjDH+rOO+8Ma4EAAAAdEVLYiYmJUXNzsw4cOKDa2lolJCQEJhPcsWOHrr/++rAWCQAAEKqQws6OHTs0b9481dXVnbXNYrGopaWlw4UBAACEQ0j37CxatEg333yzjh8/Lp/PF/Qg6AAAgGgSUtipqalRQUGB7HZ7uOsBAAAIq5DCzuzZs7V79+4wlwIAABB+Id2z88wzz+imm27Sm2++qczMTPXs2TNo+1133RWW4gAAADoqpLDzwgsvaOfOnbJardq9e7csFktgm8ViIewAAICoEVLY+dnPfqZHHnlEP/3pTxUTE9KVMAAAgE4RUlJpbm7WnDlzCDoAACDqhZRW8vLy9Ic//CHctQAAAIRdSJexWlpatGrVKr3++usaPnz4WTcoFxYWhqU4AACAjgop7Bw6dEijRo2SJL333ntB2756szIAAIDRQgo7b7zxRrjrAAAAiAjuMAYAAKZG2AEAAKZG2AEAAKZmaNh59tlnNXz4cNlsNtlsNjkcDhUVFQW2e71e5efnKzk5Wb1799asWbNUU1MT9B4ul0vTp0/XBRdcoAEDBui+++7TmTNnOvtUAABAlDI07KSmpuqJJ55QZWWl9u/fr6uuuko33HCDDh8+LElasmSJtm3bphdffFF79uxRdXW1Zs6cGXh9S0uLpk+frubmZpWWlur555/Xxo0btXz5cqNOCQAARBmL3+/3G13EV/Xt21e/+MUvNHv2bPXv319Op1OzZ8+WJH344YcaNmyYysrKNHHiRBUVFem6665TdXW17Ha7JGndunVaunSpPv30U8XFxZ3zGE1NTWpqago893g8SktLU2Njo2w2W+RPEgAQdl988YWmTZsmSSoqKlJCQoLBFSHSPB6PEhMTz/v9HTX37LS0tGjz5s06deqUHA6HKisrdfr0aWVlZQX2GTp0qNLT01VWViZJKisrU2ZmZiDoSFJ2drY8Hk+gd+hcVq5cqcTExMAjLS0tcicGAAAMZXjYOXTokHr37q34+Hjdeeed2rp1qzIyMuR2uxUXF6ekpKSg/e12u9xutyTJ7XYHBZ3W7a3b2rJs2TI1NjYGHseOHQvvSQEAgKgR0qSC4XTppZfq4MGDamxs1EsvvaS8vDzt2bMnoseMj49XfHx8RI8BAACig+FhJy4uThdffLEkacyYMaqoqNDq1as1Z84cNTc3q6GhIah3p6amRikpKZKklJQU7du3L+j9Wkdrte4DAAC6N8MvY32dz+dTU1OTxowZo549e6qkpCSw7ciRI3K5XHI4HJIkh8OhQ4cOqba2NrBPcXGxbDabMjIyOr12AAAQfQzt2Vm2bJmmTZum9PR0nThxQk6nU7t379brr7+uxMREzZ8/XwUFBerbt69sNpsWLVokh8OhiRMnSpKmTp2qjIwMzZs3T6tWrZLb7dYDDzyg/Px8LlMBAABJBoed2tpa/ehHP9Lx48eVmJio4cOH6/XXX9fVV18tSXryyScVExOjWbNmqampSdnZ2Vq7dm3g9bGxsdq+fbsWLFggh8OhXr16KS8vT48++qhRpwQAAKJM1M2zY4T2jtMHADPy+/3yer1Gl9FhXq9XM2bMkCRt3bpVVqvV4Io6xmq1ymKxGF1GVGvv97fhNygDAIzl9XoDk/GZRWvo6cqYGDF8ou4GZQAAgHCiZwcAujmr1Rq0CHNX5ff7A0sBxcfHd/lLQF39Mlw0IewAQDdnsVhMc7nkggsuMLoERCEuYwEAAFMj7AAATKO0tFRz5sxRaWmp0aUgihB2AACm4PV6VVhYqJqaGhUWFppiOD3Cg7ADADCFTZs2qa6uTpJUV1cnp9NpcEWIFoQdAECXV1VVJafTqdZ5cv1+v5xOp6qqqgyuDNGAsAMA6NL8fr9Wr17dZjsLBYCwAwDo0lwulyoqKtTS0hLU3tLSooqKCrlcLoMqQ7Qg7AAAurT09HRlZmaec9vw4cOVnp7eyRUh2hB2AACmxSUsSIQdAEAX53K5dOjQoXNuO3ToEJexQNgBAHRt6enpGjdunGJigr/SYmJiNH78eC5jgbADAOjaLBaLFi9efNbCnzExMedsR/dD2AEAdHmpqanKyckJBBuLxaKcnBxdeOGFBleGaEDYAQCYwqxZs4LCzsyZMw2uCNGCsAMAMIU//vGP8vl8kiSfz6eXX37Z4IoQLQg7AIAur3W5iK9iuQi0IuwAALo0lovA+RB2AABdGstF4HwIOwCALq11np3Y2Nig9tjYWObZgSTCDgCgi2udZ6etdubZAWEHANDlMc8OvglhBwBgCrm5uUpOTpYk9evXTzk5OQZXhGhB2AEAmILValVBQYHsdruWLFkiq9VqdEmIEj2MLgAAgHCZNGmSJk2aZHQZiDL07AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMzNOysXLlS48aNU58+fTRgwADdeOONOnLkSNA+Xq9X+fn5Sk5OVu/evTVr1izV1NQE7eNyuTR9+nRdcMEFGjBggO677z6dOXOmM08FAABEKUPDzp49e5Sfn6+3335bxcXFOn36tKZOnapTp04F9lmyZIm2bdumF198UXv27FF1dbVmzpwZ2N7S0qLp06erublZpaWlev7557Vx40YtX77ciFMCAABRxuL3+/1GF9Hq008/1YABA7Rnzx5dccUVamxsVP/+/eV0OjV79mxJ0ocffqhhw4aprKxMEydOVFFRka677jpVV1fLbrdLktatW6elS5fq008/VVxc3HmP6/F4lJiYqMbGRtlstoieIwAACI/2fn9H1T07jY2NkqS+fftKkiorK3X69GllZWUF9hk6dKjS09NVVlYmSSorK1NmZmYg6EhSdna2PB6PDh8+fM7jNDU1yePxBD0AAIA5RU3Y8fl8uvvuuzV58mR9//vflyS53W7FxcUpKSkpaF+73S632x3Y56tBp3V767ZzWblypRITEwOPtLS0MJ8NAACIFlETdvLz8/Xee+9p8+bNET/WsmXL1NjYGHgcO3Ys4scEAADG6GF0AZK0cOFCbd++XXv37lVqamqgPSUlRc3NzWpoaAjq3ampqVFKSkpgn3379gW9X+tordZ9vi4+Pl7x8fFhPgsAABCNDO3Z8fv9WrhwobZu3aq//OUvGjJkSND2MWPGqGfPniopKQm0HTlyRC6XSw6HQ5LkcDh06NAh1dbWBvYpLi6WzWZTRkZG55wIAACIWob27OTn58vpdOpPf/qT+vTpE7jHJjExUQkJCUpMTNT8+fNVUFCgvn37ymazadGiRXI4HJo4caIkaerUqcrIyNC8efO0atUqud1uPfDAA8rPz6f3BgAAGDv03GKxnLP9ueee049//GNJX04qeM899+iFF15QU1OTsrOztXbt2qBLVP/617+0YMEC7d69W7169VJeXp6eeOIJ9ejRvizH0HMAALqe9n5/R9U8O0Yh7AAA0PV0yXl2AAAAwo2wAwAATI2wAwAwjdLSUs2ZM0elpaVGl4IoQtgBAJiC1+tVYWGhampqVFhYKK/Xa3RJiBKEHQCAKWzatEl1dXWSpLq6OjmdToMrQrQg7AAAuryqqio5nU61DjD2+/1yOp2qqqoyuDJEA8IOAKBL8/v9Wr16dZvtzLACwg4AoEtzuVyqqKhQS0tLUHtLS4sqKirkcrkMqgzRgrADAOjS0tPTNW7cOMXGxga1x8bGavz48UpPTzeoMkQLwg4AoEuzWCxavHhxm+1tLU2E7oOwAwDo8lJTU3XzzTcHtd1888268MILDaoI0YSwAwAATI2wAwDo8qqqqrRly5agti1btjD0HJIIOwCALo6h5zgfwg4AoEtj6DnOh7ADAOjSGHqO8yHsAAC6NIae43wIOwCALi81NVU5OTmBYGOxWJSTk8PQc0gi7AAATCI3N1fJycmSpH79+iknJ8fgihAtCDsAAFOwWq0qKCiQ3W7XkiVLZLVajS4JUaKH0QUAABAukyZN0qRJk4wuA1GGnh0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB0AAGBqhB10O6WlpZozZ45KS0uNLgUA0AkIO+hWvF6vCgsLVVNTo8LCQnm9XqNLAgBEGGEH3cqmTZtUV1cnSaqrq5PT6TS4IgBApBF20G1UVVXJ6XTK7/dLkvx+v5xOp6qqqgyuDAAQSYQddAt+v1+rV69us701AAEAzIewg27B5XKpoqJCLS0tQe0tLS2qqKiQy+UyqDIAQKQRdtAtpKena9y4cYqNjQ1qj42N1fjx45Wenm5QZQCASCPsoFuwWCxavHhxm+0Wi8WAqgAAnYGwg24jNTVVOTk5gWBjsViUk5OjCy+80ODKAACRRNhBt5Kbm6vk5GRJUr9+/ZSTk2NwRQCASCPsoFuxWq0qKCiQ3W7XkiVLZLVajS4JABBhPYwuAOhskyZN0qRJk4wuAwDQSejZAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApmZo2Nm7d6+uv/56DRo0SBaLRa+88krQdr/fr+XLl2vgwIFKSEhQVlaWPvroo6B96uvrlZubK5vNpqSkJM2fP18nT57sxLMAAADRzNCwc+rUKY0YMUJr1qw55/ZVq1bp6aef1rp161ReXq5evXopOztbXq83sE9ubq4OHz6s4uJibd++XXv37tUdd9zRWacAAACinMXv9/uNLkL6cp2irVu36sYbb5T0Za/OoEGDdM899+jee++VJDU2Nsput2vjxo2aO3euPvjgA2VkZKiiokJjx46VJO3YsUPXXnutqqqqNGjQoHMeq6mpSU1NTYHnHo9HaWlpamxslM1mi+yJAgCAsPB4PEpMTDzv93fUzqB89OhRud1uZWVlBdoSExM1YcIElZWVae7cuSorK1NSUlIg6EhSVlaWYmJiVF5erhkzZpzzvVeuXKlHHnnkrHaPxxP+EwEAABHR+r19vn6bqA07brdbkmS324Pa7XZ7YJvb7daAAQOCtvfo0UN9+/YN7HMuy5YtU0FBQeD5J598ooyMDKWlpYWrfAAA0ElOnDihxMTENrdHbdiJpPj4eMXHxwee9+7dW8eOHVOfPn1ksVgMrAydofWy5bFjx7hsCZgMn+/uxe/368SJE23ettIqasNOSkqKJKmmpkYDBw4MtNfU1GjkyJGBfWpra4Ned+bMGdXX1wde3x4xMTFKTU3teNHoUmw2G/8zBEyKz3f38U09Oq2idp6dIUOGKCUlRSUlJYE2j8ej8vJyORwOSZLD4VBDQ4MqKysD+/zlL3+Rz+fThAkTOr1mAAAQfQzt2Tl58qQ+/vjjwPOjR4/q4MGD6tu3r9LT03X33Xfr8ccf1yWXXKIhQ4bowQcf1KBBgwIjtoYNG6ZrrrlGt99+u9atW6fTp09r4cKFmjt37nm7tAAAQPdgaNjZv3+/rrzyysDz1puG8/LytHHjRt1///06deqU7rjjDjU0NOiyyy7Tjh07ZLVaA6/ZtGmTFi5cqClTpigmJkazZs3S008/3enngq4jPj5eDz30UNB9WwDMgc83ziVq5tkBAACIhKi9ZwcAACAcCDsAAMDUCDsAAMDUCDsAAMDUCDvoVtasWaOLLrpIVqtVEyZM0L59+4wuCUAY7N27V9dff70GDRoki8WiV155xeiSEEUIO+g2/vCHP6igoEAPPfSQDhw4oBEjRig7O/usWbgBdD2nTp3SiBEjtGbNGqNLQRRi6Dm6jQkTJmjcuHF65plnJEk+n09paWlatGiRfvrTnxpcHYBwsVgs2rp1a2ACWoCeHXQLzc3NqqysVFZWVqAtJiZGWVlZKisrM7AyAECkEXbQLXz22WdqaWmR3W4Parfb7XK73QZVBQDoDIQdAABgaoQddAv9+vVTbGysampqgtpramqUkpJiUFUAgM5A2EG3EBcXpzFjxqikpCTQ5vP5VFJSIofDYWBlAIBIM3TVc6AzFRQUKC8vT2PHjtX48eP11FNP6dSpU7r11luNLg1AB508eVIff/xx4PnRo0d18OBB9e3bV+np6QZWhmjA0HN0K88884x+8YtfyO12a+TIkXr66ac1YcIEo8sC0EG7d+/WlVdeeVZ7Xl6eNm7c2PkFIaoQdgAAgKlxzw4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4ASGpubja6BAARQtgBENVeeuklZWZmKiEhQcnJycrKytKpU6ckSb/97W/1ve99T/Hx8Ro4cKAWLlwYeJ3L5dINN9yg3r17y2az6eabb1ZNTU1g+8MPP6yRI0fqN7/5jYYMGSKr1SpJamho0H/913+pf//+stlsuuqqq/TOO+907kkDCCvCDoCodfz4cd1yyy267bbb9MEHH2j37t2aOXOm/H6/nn32WeXn5+uOO+7QoUOH9Oc//1kXX3yxJMnn8+mGG25QfX299uzZo+LiYv3zn//UnDlzgt7/448/1h//+Ee9/PLLOnjwoCTppptuUm1trYqKilRZWanRo0drypQpqq+v7+zTBxAmrHoOIGodOHBAY8aM0f/93/9p8ODBQdsuvPBC3XrrrXr88cfPel1xcbGmTZumo0ePKi0tTZL0/vvv63vf+5727duncePG6eGHH9aKFSv0ySefqH///pKkv/71r5o+fbpqa2sVHx8feL+LL75Y999/v+64444Ini2ASOlhdAEA0JYRI0ZoypQpyszMVHZ2tqZOnarZs2fr9OnTqq6u1pQpU875ug8++EBpaWmBoCNJGRkZSkpK0gcffKBx48ZJkgYPHhwIOpL0zjvv6OTJk0pOTg56vy+++EL/+Mc/InCGADoDYQdA1IqNjVVxcbFKS0u1c+dO/epXv9LPfvYzlZSUhOX9e/XqFfT85MmTGjhwoHbv3n3WvklJSWE5JoDOR9gBENUsFosmT56syZMna/ny5Ro8eLCKi4t10UUXqaSkRFdeeeVZrxk2bJiOHTumY8eOBV3GamhoUEZGRpvHGj16tNxut3r06KGLLrooUqcEoJMRdgBErfLycpWUlGjq1KkaMGCAysvL9emnn2rYsGF6+OGHdeedd2rAgAGaNm2aTpw4obfeekuLFi1SVlaWMjMzlZubq6eeekpnzpzRT37yE/3Hf/yHxo4d2+bxsrKy5HA4dOONN2rVqlX67ne/q+rqar366quaMWPGN74WQPQi7ACIWjabTXv37tVTTz0lj8ejwYMH65e//KWmTZsmSfJ6vXryySd17733ql+/fpo9e7akL3uD/vSnP2nRokW64oorFBMTo2uuuUa/+tWvvvF4FotFr732mn72s5/p1ltv1aeffqqUlBRdccUVstvtET9fAJHBaCwAAGBqzLMDAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABM7f8Du9uwvv6+kAcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='score', y='max_chunk_len', data=pred_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "15203bbd-3a37-42b5-b660-c39e2f4e3675",
   "metadata": {},
   "source": [
    "从最终打分来看，错误/正确的问题，对应的平均上下文长度已经没有显著差异了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fc0c11c-be15-47dc-88bf-31b0192b4622",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 1058.563616,
   "end_time": "2024-11-23T14:46:37.625874",
   "environment_variables": {},
   "exception": null,
   "input_path": "13_contextual_embeddings.ipynb",
   "output_path": "run_13_contextual_embeddings.ipynb",
   "parameters": {},
   "start_time": "2024-11-23T14:28:59.062258",
   "version": "2.6.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "0cd8c168767249f2a5fa412173f6e751": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ce1d1d9d86c40d9839877ff95734491",
       "max": 100,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_231702cf4d79477f9d5548665a1b18fe",
       "tabbable": null,
       "tooltip": null,
       "value": 100
      }
     },
     "2133bb8d85d34b8db112b4408ad60320": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "231702cf4d79477f9d5548665a1b18fe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "23b1ad9c0f9c46c888da66e85c90eb84": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "24e6eadc3dc940ecabf30dd1a3c6d1f3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_fa4bddf2c33241b5bf918054518f128f",
       "max": 52,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_edc33e82be8f41eba6a18a0ef074ab7a",
       "tabbable": null,
       "tooltip": null,
       "value": 52
      }
     },
     "2f60367b1c8941e2bf71661c33969ae8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3865f25c78aa46f29a25d807205281c3": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3d0b06deaa654b989eece8cde06fa0f8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "3f8ceda83287475b97608e42f5f6782f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "4881e496f1c84fe29ce9ebebaddfb3c2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bd096d5d219a467786a85cfe1613fedd",
        "IPY_MODEL_24e6eadc3dc940ecabf30dd1a3c6d1f3",
        "IPY_MODEL_bc2b8104b4244d8cacedeb95e800d91c"
       ],
       "layout": "IPY_MODEL_6b9a8e43c1c342dba500a14e7149b600",
       "tabbable": null,
       "tooltip": null
      }
     },
     "5ce1d1d9d86c40d9839877ff95734491": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "5ddb08be5cc64c9ab40a1d62a21763a5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_86283159049d48b1adcfb2de2d404d4d",
       "placeholder": "​",
       "style": "IPY_MODEL_2133bb8d85d34b8db112b4408ad60320",
       "tabbable": null,
       "tooltip": null,
       "value": " 100/100 [08:34&lt;00:00, 10.01s/it]"
      }
     },
     "5ef9d83ccad1471f85335900a24a8553": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "6b9a8e43c1c342dba500a14e7149b600": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "816a079a8c804fbfa9b9a74f941abea8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bcc69ec5db1b4aab977807284c9290e7",
        "IPY_MODEL_0cd8c168767249f2a5fa412173f6e751",
        "IPY_MODEL_5ddb08be5cc64c9ab40a1d62a21763a5"
       ],
       "layout": "IPY_MODEL_d1178c6858284f788a80b5f2a14fd0b7",
       "tabbable": null,
       "tooltip": null
      }
     },
     "86283159049d48b1adcfb2de2d404d4d": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "8ff8262c56604119883f4a5f13bb74ab": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ef9d83ccad1471f85335900a24a8553",
       "placeholder": "​",
       "style": "IPY_MODEL_e89e77133c344fc48c1d62f5a607ec93",
       "tabbable": null,
       "tooltip": null,
       "value": " 8/8 [00:18&lt;00:00,  2.27s/it]"
      }
     },
     "9189a076554543aaa6f5ee04e40dbe1b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "988e6697a2af486fadeaf0b84347b565": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_e1aae4c55cb64f379e74f15357275628",
        "IPY_MODEL_fd9e23198ca1489a9773fda3510bf857",
        "IPY_MODEL_8ff8262c56604119883f4a5f13bb74ab"
       ],
       "layout": "IPY_MODEL_d2ee15001d2244529f7e47d3333c0f8e",
       "tabbable": null,
       "tooltip": null
      }
     },
     "9fc7d91f94a94933bde5ba80e64587de": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "a7d240a289084bdfba4724c0efd5ab07": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "bc2b8104b4244d8cacedeb95e800d91c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_2f60367b1c8941e2bf71661c33969ae8",
       "placeholder": "​",
       "style": "IPY_MODEL_9fc7d91f94a94933bde5ba80e64587de",
       "tabbable": null,
       "tooltip": null,
       "value": " 52/52 [04:26&lt;00:00,  4.22s/it]"
      }
     },
     "bcc69ec5db1b4aab977807284c9290e7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3f8ceda83287475b97608e42f5f6782f",
       "placeholder": "​",
       "style": "IPY_MODEL_3d0b06deaa654b989eece8cde06fa0f8",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "bd096d5d219a467786a85cfe1613fedd": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3865f25c78aa46f29a25d807205281c3",
       "placeholder": "​",
       "style": "IPY_MODEL_9189a076554543aaa6f5ee04e40dbe1b",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "cc3ed8dc4a5c43aca7b62d904865b2fa": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cf68b6fe24964ce792aa63827489cb97": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "d1178c6858284f788a80b5f2a14fd0b7": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d2ee15001d2244529f7e47d3333c0f8e": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e1aae4c55cb64f379e74f15357275628": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_23b1ad9c0f9c46c888da66e85c90eb84",
       "placeholder": "​",
       "style": "IPY_MODEL_cf68b6fe24964ce792aa63827489cb97",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "e89e77133c344fc48c1d62f5a607ec93": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "edc33e82be8f41eba6a18a0ef074ab7a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "fa4bddf2c33241b5bf918054518f128f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "fd9e23198ca1489a9773fda3510bf857": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_cc3ed8dc4a5c43aca7b62d904865b2fa",
       "max": 8,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_a7d240a289084bdfba4724c0efd5ab07",
       "tabbable": null,
       "tooltip": null,
       "value": 8
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
